diff --git a/Cargo.lock b/Cargo.lock index 6c02af47..0162c8c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1271,6 +1271,18 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fancy-regex" version = "0.11.0" @@ -1694,6 +1706,15 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "hassle-rs" version = "0.10.0" @@ -1771,6 +1792,39 @@ dependencies = [ "syn 2.0.85", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-cache-semantics" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92baf25cf0b8c9246baecf3a444546360a97b569168fdf92563ee6a47829920c" +dependencies = [ + "http", + "http-serde", + "serde", + "time", +] + +[[package]] +name = "http-serde" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1133cafcce27ea69d35e56b3a8772e265633e04de73c5f4e1afdffc1d19b5419" +dependencies = [ + "http", + "serde", +] + [[package]] name = "httpdate" version = "1.0.3" @@ -1883,6 +1937,7 @@ dependencies = [ "anstyle", "anyhow", "base64 0.22.1", + "bincode", "bytemuck", "clap", "comrak", @@ -1896,6 +1951,8 @@ dependencies = [ "glyphon", "html-escape", "html5ever", + "http", + "http-cache-semantics", "human-panic", "image", "indexmap 2.6.0", @@ -1914,6 +1971,7 @@ dependencies = [ "pretty_assertions", "raw-window-handle", "resvg", + "rusqlite", "serde", "serde_yaml", "smart-debug", @@ -1928,6 +1986,7 @@ dependencies = [ "two-face", "twox-hash", "ureq", + "url", "wgpu", "winit", ] @@ -2188,6 +2247,17 @@ dependencies = [ "redox_syscall 0.5.7", ] +[[package]] +name = "libsqlite3-sys" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -3549,6 +3619,20 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +[[package]] +name = "rusqlite" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +dependencies = [ + "bitflags 2.6.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rust-ini" version = "0.18.0" @@ -4549,6 +4633,7 @@ checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ "base64 0.22.1", "flate2", + "http", "log", "once_cell", "rustls", @@ -4639,6 +4724,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 444dad29..8504232a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,8 @@ anstream = "0.6.17" anstyle = "1.0.9" # Easier error handling anyhow = "1.0.91" +# (De)serializing cache data +bincode = "1.3.3" # System preferred color scheme detection dark-light = "1.1.1" # System specific directories @@ -51,6 +53,8 @@ glyphon = "0.3" html-escape = "0.2.13" # Parsing the HTML document that the markdown+html was converted into html5ever = "0.27.0" +http = "1.1.0" +http-cache-semantics = "2.1.0" # Provides some extra helpers that we use for our custom panic hook human-panic = "2.0.0" # Generic image decoding @@ -74,7 +78,9 @@ pollster = "0.4.0" raw-window-handle = "0.5.2" # SVG rendering resvg = "0.39.0" -# Parses the optional YAML frontmatter (replace with just a yaml parser) +# Sqlite DB for our image cache +rusqlite = { version = "0.31.0", features = ["bundled"] } +# Parses the optional YAML frontmatter (TODO: replace with just a yaml parser) serde_yaml = "0.9.34" # Easy `Debug` formatting changes used to keep snapshot tests more succinct smart-debug = "0.0.3" @@ -92,7 +98,8 @@ two-face = "0.4.0" # More text hashing... twox-hash = "1.6.3" # HTTP client for requesting images from urls -ureq = "2.10.1" +ureq = { version = "2.10.1", features = ["http-crate"] } +url = "2.5.0" # Cross platform GPU magic sauce wgpu = "0.16" @@ -195,13 +202,18 @@ lto = true # Selectively bump up opt level for some dependencies to improve dev build perf [profile.dev.package] -ttf-parser.opt-level = 2 -rustybuzz.opt-level = 2 +backtrace.opt-level = 2 cosmic-text.opt-level = 2 -png.opt-level = 2 fontdb.opt-level = 2 +gif.opt-level = 2 +image.opt-level = 2 +image-webp.opt-level = 2 +lz4_flex.opt-level = 2 miniz_oxide.opt-level = 2 -backtrace.opt-level = 2 +png.opt-level = 2 +rustybuzz.opt-level = 2 +tiny-skia.opt-level = 2 +ttf-parser.opt-level = 2 [lints.rust.unexpected_cfgs] level = "warn" diff --git a/assets/test_data/cargo_public_api.webp b/assets/test_data/cargo_public_api.webp index 23c0c10f..820ae73a 100644 Binary files a/assets/test_data/cargo_public_api.webp and b/assets/test_data/cargo_public_api.webp differ diff --git a/src/file_watcher/mod.rs b/src/file_watcher/mod.rs index 053d8f64..b5240204 100644 --- a/src/file_watcher/mod.rs +++ b/src/file_watcher/mod.rs @@ -3,6 +3,7 @@ mod tests; use std::path::{Path, PathBuf}; use std::sync::mpsc; +use std::thread; use std::time::Duration; use crate::InlyneEvent; @@ -106,9 +107,12 @@ impl Watcher { let notify_watcher = new_debouncer(Duration::from_millis(10), None, MsgHandler(msg_tx)).unwrap(); - std::thread::spawn(move || { - endlessly_handle_messages(notify_watcher, msg_rx, reload_callback, file_path); - }); + thread::Builder::new() + .name("file-watcher".into()) + .spawn(move || { + endlessly_handle_messages(notify_watcher, msg_rx, reload_callback, file_path); + }) + .expect("failed to spawn thread"); watcher } diff --git a/src/file_watcher/tests.rs b/src/file_watcher/tests.rs index e7740e8d..14247963 100644 --- a/src/file_watcher/tests.rs +++ b/src/file_watcher/tests.rs @@ -3,6 +3,8 @@ use std::path::{Path, PathBuf}; use std::sync::mpsc; use std::time::Duration; +use crate::test_utils::temp; + use super::{Callback, Watcher}; use tempfile::TempDir; @@ -62,11 +64,7 @@ impl Delays { fn init_test_env() -> (TestEnv, TempDir) { // Create our dummy test env - let temp_dir = tempfile::Builder::new() - .prefix("inlyne-tests-") - .tempdir() - .unwrap(); - let base = temp_dir.path(); + let (temp_dir, base) = temp::dir(); let main_file = base.join("main.md"); let rel_file = base.join("rel.md"); fs::write(&main_file, "# Main\n\n[rel](./rel.md)").unwrap(); diff --git a/src/history.rs b/src/history.rs index cd7d9a4a..8820d170 100644 --- a/src/history.rs +++ b/src/history.rs @@ -59,14 +59,11 @@ mod tests { use std::fs; use super::*; + use crate::test_utils::temp; #[test] fn sanity() { - let temp_dir = tempfile::Builder::new() - .prefix("inlyne-tests-") - .tempdir() - .unwrap(); - let temp_path = temp_dir.path().canonicalize().unwrap(); + let (_temp_dir, temp_path) = temp::dir(); let root = temp_path.join("a"); let fork1 = temp_path.join("b"); diff --git a/src/image/cache/global/db.rs b/src/image/cache/global/db.rs new file mode 100644 index 00000000..473036d0 --- /dev/null +++ b/src/image/cache/global/db.rs @@ -0,0 +1,145 @@ +use std::{ + fs, + path::{Path, PathBuf}, + time::SystemTime, +}; + +use crate::{ + image::cache::{global::RemoteMeta, RemoteKey, StableImage}, + utils, +}; + +use anyhow::Context; +use http_cache_semantics::CachePolicy; +use rusqlite::{types::FromSqlError, Connection, OptionalExtension}; + +use super::wrappers::{CachePolicyBytes, StableImageBytes, SystemTimeSecs}; + +/// The current version for our database file +/// +/// We're a cache so we don't really have to keep worrying about preserving data permanently. If we +/// want to make some really nasty changes without dealing with migrations then we can bump this +/// version and rotate to a totally new file entirely. Old versions are handled durring garbage +/// collection +const VERSION: u32 = 0; + +fn file_name() -> String { + format!("image-cache-v{VERSION}.db3") +} + +const SCHEMA: &str = include_str!("db_schema.sql"); + +// TODO: create a connection pool so that we can actually re-use connections (and their cache) +// instead of having to create a new one for each worker or serialize all cache interactions +pub struct Db(Connection); + +impl Db { + pub fn default_path() -> anyhow::Result { + let cache_dir = utils::inlyne_cache_dir().context("Failed to locate cache dir")?; + let db_path = cache_dir.join(file_name()); + Ok(db_path) + } + + pub fn open_or_create(path: &Path) -> anyhow::Result { + let db_dir = path.parent().with_context(|| { + format!( + "Unable to locate database directory from: {}", + path.display() + ) + })?; + fs::create_dir_all(db_dir) + .with_context(|| format!("Failed creating db directory at: {}", db_dir.display()))?; + let conn = Connection::open(path)?; + Self::create_schema(&conn)?; + Ok(Self(conn)) + } + + fn create_schema(conn: &Connection) -> anyhow::Result<()> { + conn.execute(SCHEMA, ())?; + Ok(()) + } + + pub fn get_meta(&self, remote: &RemoteKey) -> rusqlite::Result> { + let mut stmt = self + .0 + .prepare_cached("select generation, last_used, policy from images where url = ?1")?; + stmt.query_row([&remote.0], |row| { + let generation = row.get(0)?; + let last_used = row.get::<_, SystemTimeSecs>(1)?.into(); + let policy = (&row.get::<_, CachePolicyBytes>(2)?) + .try_into() + .map_err(|err| FromSqlError::Other(Box::new(err)))?; + Ok(RemoteMeta { + generation, + last_used, + policy, + }) + }) + .optional() + .map_err(Into::into) + } + + pub fn get_data( + &self, + remote: &RemoteKey, + generation: u32, + ) -> rusqlite::Result> { + let mut stmt = self + .0 + .prepare_cached("select image from images where url = ?1 and generation = ?2")?; + stmt.query_row((&remote.0, generation), |row| { + let blah = row + .get::<_, StableImageBytes>(0)? + .try_into() + .map_err(|err| FromSqlError::Other(Box::new(err)))?; + Ok(blah) + }) + .optional() + .map_err(Into::into) + } + + pub fn insert( + &mut self, + remote: &RemoteKey, + policy: &CachePolicy, + image: StableImage, + now: SystemTime, + ) -> anyhow::Result<()> { + let url = &remote.0; + let now: SystemTimeSecs = now.try_into()?; + let policy: CachePolicyBytes = policy.try_into()?; + let image: StableImageBytes = image.into(); + + let mut stmt = self.0.prepare_cached( + "insert or replace into images (url, last_used, policy, image, generation) + values (?1, ?2, ?3, ?4, abs(random() % 1000000))", + )?; + stmt.execute((url, now, policy, image))?; + Ok(()) + } + + pub fn refresh( + &self, + remote: &RemoteKey, + generation: u32, + policy: &CachePolicy, + ) -> anyhow::Result<()> { + todo!(); + } + + pub fn refresh_last_used( + &self, + remote: &RemoteKey, + generation: u32, + now: SystemTime, + ) -> anyhow::Result<()> { + let url = &remote.0; + let now: SystemTimeSecs = now.try_into()?; + // TODO: cache this query + self.0.execute( + "update images set last_used = ?1 where url = ?2 and generation = ?3", + (now, url, generation), + )?; + Ok(()) + } +} diff --git a/src/image/cache/global/db_schema.sql b/src/image/cache/global/db_schema.sql new file mode 100644 index 00000000..12f6c670 --- /dev/null +++ b/src/image/cache/global/db_schema.sql @@ -0,0 +1,7 @@ +create table if not exists images ( + url text primary key, + generation int not null, + last_used int not null, + policy blob not null, + image blob not null +) strict diff --git a/src/image/cache/global/mod.rs b/src/image/cache/global/mod.rs new file mode 100644 index 00000000..39ed88bb --- /dev/null +++ b/src/image/cache/global/mod.rs @@ -0,0 +1,248 @@ +use std::{ + fmt, fs, + path::{Path, PathBuf}, + time::SystemTime, +}; + +use super::{RemoteKey, StableImage, StandardRequest}; +use crate::{image::cache::global, utils}; + +use anyhow::Context; +use http::request; +use http_cache_semantics::{BeforeRequest, CachePolicy, RequestLike}; +use serde::{Deserialize, Serialize}; + +mod db; +pub mod wrappers; + +// The database is currently externally versioned meaning that we switch to an entirely new file +// when we bump the version +// TODO: Garbage collection should also be adjusted to cleanup unused databases over time +const VERSION: u32 = 0; + +pub fn db_name() -> String { + format!("image-cache-v{VERSION}.db3") +} + +fn db_path() -> anyhow::Result { + let cache_dir = utils::inlyne_cache_dir().context("Failed to locate cache dir")?; + let db_path = cache_dir.join(db_name()); + Ok(db_path) +} + +pub struct Stats { + pub path: PathBuf, + pub inner: Option, +} + +pub struct StatsInner { + pub size: Bytes, +} + +impl Stats { + pub fn detect() -> anyhow::Result { + let path = db_path()?; + path.try_into() + } +} + +impl TryFrom for Stats { + type Error = anyhow::Error; + + fn try_from(path: PathBuf) -> Result { + let inner = if !path.is_file() { + None + } else { + let meta = fs::metadata(&path)?; + let size = meta.len().into(); + let inner = StatsInner { size }; + Some(inner) + }; + + Ok(Self { path, inner }) + } +} + +impl fmt::Display for Stats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Self { path, inner } = self; + match inner { + None => write!(f, "path (not found): {}", path.display()), + Some(inner) => { + writeln!(f, "path: {}", path.display())?; + write!(f, "total size: {}", inner.size) + } + } + } +} + +pub struct Bytes(u64); + +impl From for Bytes { + fn from(bytes: u64) -> Self { + Self(bytes) + } +} + +impl fmt::Display for Bytes { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut unit = "B"; + let mut dividend = 1; + while self.0 / dividend / 1_024 > 1 { + unit = match unit { + "B" => "KiB", + "KiB" => "MiB", + _ => break, + }; + dividend *= 1_024; + } + + write!(f, "{} {}", self.0 / dividend, unit) + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct RemoteMeta { + // TODO: switch to a content hash or uuid v4 + /// A generation used to uniquely identify this cache entry + /// + /// We use generations to keep track of the consistency of a cache entry between different + /// tranactions. If we increment the generation every time we invalidate the entry in some way + /// (e.g. changing the stored image) then we're able to keep track of if we're still referring + /// to the same image in siturations like iniital validation/revalidation/etc. + pub generation: u32, + pub last_used: SystemTime, + pub policy: CachePolicy, +} + +pub fn run_garbage_collector() -> anyhow::Result<()> { + let cache = Cache::load()?; + cache.run_garbage_collector() +} + +pub struct Cache(db::Db); + +impl Cache { + pub fn load() -> anyhow::Result { + let db_path = db::Db::default_path()?; + Self::load_from_file(&db_path) + } + + pub fn load_from_file(path: &Path) -> anyhow::Result { + let db = db::Db::open_or_create(path)?; + Ok(Self(db)) + } + + // TODO: rename to remove `remote_` since it's always remote now + pub fn check_remote_cache( + &self, + key: &RemoteKey, + now: SystemTime, + ) -> anyhow::Result { + let check = self.check_remote_cache_inner(key, now)?.unwrap_or_else(|| { + let req: StandardRequest = key.into(); + let parts = (&req).into(); + CacheCont::Miss(parts).into() + }); + Ok(check) + } + + // TODO: rename to remove `remote_` since it's always remote now + fn check_remote_cache_inner( + &self, + key: &RemoteKey, + now: SystemTime, + ) -> anyhow::Result> { + fn is_corrupt_entry(err: &rusqlite::Error) -> bool { + use rusqlite::Error as E; + + match err { + E::FromSqlConversionFailure(_, _, conv_err) => { + conv_err.is::() + } + E::IntegralValueOutOfRange(_, _) => true, + _ => false, + } + } + + let meta = match self.0.get_meta(key) { + Ok(Some(meta)) => meta, + Ok(None) => return Ok(None), + Err(err) if is_corrupt_entry(&err) => { + tracing::warn!(%key, %err, "Ignoring corrupt cache entry"); + return Ok(None); + } + Err(err) => return Err(err.into()), + }; + let req: StandardRequest = key.into(); + + let maybe_meta = match meta.policy.before_request(&req, now) { + BeforeRequest::Fresh(_) => { + let gen = meta.generation; + match self.0.get_data(key, gen) { + Ok(Some(image)) => { + self.0.refresh_last_used(key, gen, now)?; + Some(CacheCheck::Fresh((meta.policy, image.into()))) + } + Ok(None) => None, + Err(err) if is_corrupt_entry(&err) => { + tracing::warn!(%key, %err, "Ignoring corrupt cache entry"); + None + } + Err(err) => return Err(err.into()), + } + } + BeforeRequest::Stale { request, .. } => { + // NOTE: We're using comparing the headers of the original and `request` + // requests as a proxy of `http-cache-semantics` trying to refresh our original + // data vs just sending the request through unchanged + if req.headers() == request.headers() { + // No change to our usual headers means this is a new request + Some(CacheCont::Miss(request).into()) + } else { + self.0 + .get_data(key, meta.generation)? + .map(|image| CacheCont::TryRefresh((meta.policy, request, image)).into()) + } + } + }; + + Ok(maybe_meta) + } + + pub fn insert( + &mut self, + key: &RemoteKey, + policy: &CachePolicy, + image: StableImage, + now: SystemTime, + ) -> anyhow::Result<()> { + self.0.insert(key, policy, image, now) + } + + pub fn run_garbage_collector(&self) -> anyhow::Result<()> { + // TODO: pass over and remove entries and then run compaction. Can get the size of various + // parts of the image data table to determine when we should actually run compaction + // (things generally run better when there are pages that can be reused instead of always + // compacting down to the minimal size) + todo!(); + } +} + +#[must_use] +pub enum CacheCheck { + Fresh((CachePolicy, StableImage)), + Cont(CacheCont), +} + +impl From for CacheCheck { + fn from(cont: CacheCont) -> Self { + Self::Cont(cont) + } +} + +#[must_use] +pub enum CacheCont { + TryRefresh((CachePolicy, request::Parts, StableImage)), + Miss(request::Parts), +} diff --git a/src/image/cache/global/wrappers.rs b/src/image/cache/global/wrappers.rs new file mode 100644 index 00000000..21c595d1 --- /dev/null +++ b/src/image/cache/global/wrappers.rs @@ -0,0 +1,225 @@ +use std::{ + array, fmt, + time::{Duration, SystemTime, SystemTimeError}, +}; + +use crate::image::{cache::StableImage, ImageData}; + +use http_cache_semantics::CachePolicy; +use rusqlite::types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef}; + +#[derive(Debug)] +pub enum ConvertError { + CachePolicy(bincode::Error), + Image(StableImageConvertError), +} + +impl fmt::Display for ConvertError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::CachePolicy(err) => write!(f, "Invalid stored cache policy: {err}"), + Self::Image(err) => write!(f, "Invalid stored image: {err}"), + } + } +} + +impl std::error::Error for ConvertError {} + +impl From for ConvertError { + fn from(err: bincode::Error) -> Self { + Self::CachePolicy(err) + } +} + +impl From for ConvertError { + fn from(err: StableImageConvertError) -> Self { + Self::Image(err) + } +} + +pub struct CachePolicyBytes(Vec); + +impl From<&CachePolicy> for CachePolicyBytes { + fn from(policy: &CachePolicy) -> Self { + let bytes = bincode::serialize(policy).unwrap(); + Self(bytes) + } +} + +impl TryFrom<&CachePolicyBytes> for CachePolicy { + type Error = ConvertError; + + fn try_from(bytes: &CachePolicyBytes) -> Result { + let policy = bincode::deserialize(&bytes.0)?; + Ok(policy) + } +} + +impl ToSql for CachePolicyBytes { + fn to_sql(&self) -> rusqlite::Result> { + self.0.to_sql() + } +} + +impl FromSql for CachePolicyBytes { + fn column_result(value: ValueRef<'_>) -> FromSqlResult { + let blob = value.as_blob()?; + Ok(Self(blob.to_vec())) + } +} + +pub struct SystemTimeSecs(u64); + +impl TryFrom for SystemTimeSecs { + type Error = SystemTimeError; + + fn try_from(time: SystemTime) -> Result { + let since_unix_epoch = time.duration_since(SystemTime::UNIX_EPOCH)?; + Ok(Self(since_unix_epoch.as_secs())) + } +} + +impl From for SystemTime { + fn from(secs: SystemTimeSecs) -> Self { + SystemTime::UNIX_EPOCH + Duration::from_secs(secs.0) + } +} + +impl ToSql for SystemTimeSecs { + fn to_sql(&self) -> rusqlite::Result> { + self.0.to_sql() + } +} + +impl FromSql for SystemTimeSecs { + fn column_result(value: ValueRef<'_>) -> FromSqlResult { + let secs = value.as_i64()?; + let secs: u64 = secs.try_into().map_err(|_| FromSqlError::InvalidType)?; + Ok(Self(secs)) + } +} + +/// The representation of how a [`StableImage`] is stored in the DB +/// +/// The image gets stored as a blob of bytes with a variable-size footer. The footer consists of a +/// byte that indicates the kind of underlying [`StableImage`]. The size of footer depends on the +/// kind of the underlying image +/// +/// The reason that we use a footer instead of a header is because it's easier to avoid needlessly +/// copying around the bulky image data if that's what we root the blob around +pub struct StableImageBytes(Vec); + +impl StableImageBytes { + const COMPRESSED_SVG_KIND: u8 = 0; + const PRE_DECODED_KIND: u8 = 1; + // 1 (scale bool) + 8 (2 u32s for dimensions) + const PRE_DECODED_FOOTER_LEN: usize = 9; + + pub fn len(&self) -> usize { + self.0.len() + } +} + +#[derive(Clone, Debug)] +pub enum StableImageConvertError { + MissingKind, + InvalidKind(u8), + MissingPreDecodedFooter, + InvalidPreDecodedScale(u8), +} + +impl fmt::Display for StableImageConvertError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::MissingKind => f.write_str("Missing stable image kind"), + Self::InvalidKind(kind) => write!(f, "Invalid stable image kind: {kind}"), + Self::MissingPreDecodedFooter => f.write_str("Missing pre-decoded image footer"), + Self::InvalidPreDecodedScale(scale) => write!(f, "Invalid pre-decoded scale: {scale}"), + } + } +} + +impl std::error::Error for StableImageConvertError {} + +impl TryFrom for StableImage { + type Error = ConvertError; + + fn try_from(bytes: StableImageBytes) -> Result { + let mut bytes = bytes.0; + let kind = bytes.pop().ok_or(StableImageConvertError::MissingKind)?; + match kind { + StableImageBytes::COMPRESSED_SVG_KIND => Ok(Self::CompressedSvg(bytes)), + StableImageBytes::PRE_DECODED_KIND => { + let footer_start = bytes + .len() + .checked_sub(StableImageBytes::PRE_DECODED_FOOTER_LEN) + .ok_or(StableImageConvertError::MissingPreDecodedFooter)?; + let (dim_x, dim_y, scale) = { + let mut footer = bytes.drain(footer_start..); + let scale = match footer.next().expect("Length pre-checked") { + 0 => false, + 1 => true, + unknown => { + return Err( + StableImageConvertError::InvalidPreDecodedScale(unknown).into() + ); + } + }; + let dim_x = array::from_fn(|_| footer.next().expect("Length pre-checked")); + let dim_y = array::from_fn(|_| footer.next().expect("Length pre-checked")); + let dim_x = u32::from_be_bytes(dim_x); + let dim_y = u32::from_be_bytes(dim_y); + (dim_x, dim_y, scale) + }; + let image_data = ImageData { + lz4_blob: bytes.into(), + scale, + dimensions: (dim_x, dim_y), + }; + Ok(Self::PreDecoded(image_data)) + } + unknown => Err(StableImageConvertError::InvalidKind(unknown).into()), + } + } +} + +impl From for StableImageBytes { + fn from(data: StableImage) -> Self { + match data { + StableImage::PreDecoded(ImageData { + lz4_blob, + scale, + dimensions: (dim_x, dim_y), + }) => { + let mut bytes = lz4_blob.to_vec(); + bytes.reserve_exact(Self::PRE_DECODED_FOOTER_LEN + 1); + bytes.push(scale.into()); + bytes.extend_from_slice(&dim_x.to_be_bytes()); + bytes.extend_from_slice(&dim_y.to_be_bytes()); + bytes.push(Self::PRE_DECODED_KIND); + Self(bytes) + } + StableImage::CompressedSvg(mut bytes) => { + bytes.reserve_exact(1); + bytes.push(Self::COMPRESSED_SVG_KIND); + Self(bytes) + } + } + } +} + +impl ToSql for StableImageBytes { + fn to_sql(&self) -> rusqlite::Result> { + self.0.to_sql() + } +} + +impl FromSql for StableImageBytes { + fn column_result(value: ValueRef<'_>) -> FromSqlResult { + let blob = value.as_blob()?; + Ok(Self(blob.to_vec())) + } +} + +// TODO: roundtrip prop-test some of ^^. Could try fuzzing with `divan` too since we shouldn't have +// to split out a separate library using that diff --git a/src/image/cache/mod.rs b/src/image/cache/mod.rs new file mode 100644 index 00000000..8cd3fd8e --- /dev/null +++ b/src/image/cache/mod.rs @@ -0,0 +1,558 @@ +//! Contains our image caching logic +//! +//! The current cache is a 2-layered cache consisting of a volatile per-session cache along with a +//! persistent per-user cache +//! +//! # Image source +//! +//! `inlyne` can load images from either local files stored on the user's computer, or from images +//! requested from remote URLs +//! +//! ## Local Images (from files) +//! +//! Local images are handled exclusively by the per-session cache since there's no point in taking +//! space from remote images which are much more important in terms of caching +//! +//! Validity is determined by storing and comparing the local file's last modified time where an +//! entry is valid if the last modified time is an exact match +//! +//! ## Remote Images (from URLs) +//! +//! Remote images are stored in all layers of the cache +//! +//! Validity is determined according to the rules codified in the `http-cache-semantics` crate +//! which depends on both the request and response headers. Our actions are determined by the +//! response from the `.before_request()` and `.after_response()` hooks +//! +//! # Cache Layers +//! +//! Like typical layered caches entries are retrieved by going down the layers, pulling the entries +//! up through all of the levels when updating +//! +//! ## L1 - Volatile Per-Session Cache +//! +//! The per-session cache provides 2 key functions: +//! +//! 1. A fast lookup to avoid reaching out to the global database on every request +//! - Reloading the page should not re-pull all of the images from the database +//! - The slowest aspects of checking this cache are either waiting for writers on the +//! `RwLock`s and stating the local file to get its last modified time +//! 2. The ability to make cheap copies of image data +//! - The bulk of the data is stored in `Arc<_>`s which are cheap to copy +//! +//! ## L2 - Persistent Per-User Cache +//! +//! The persistent per-user cache functions as a typical private HTTP cache. This affords most of +//! the typical benefits of an HTTP cache e.g. avoiding making requests on fresh content, avoiding +//! re-transferring bodies on matching E-Tags, etc. +//! +//! # Garbage Collection +//! +//! Entries are evicted based on both a global size limit and a global time-to-live (TTL). +//! Constraining along both of these allows for the cache to behave well for both very active and +//! inactive users. Active users can sit at the cache size limit assuming they look at enough +//! images often enough to fully saturate the cache to the size limit. Inactive users can have a +//! smaller cache as only the entries that are within the global TTL will be retained + +use std::{ + fmt, + io::{self, Read}, + path::PathBuf, + sync::Arc, + time::{Instant, SystemTime}, +}; + +use crate::{ + image::{ImageBuffer, ImageData}, + HistTag, +}; + +use http_cache_semantics::{AfterResponse, CachePolicy, RequestLike}; +use lz4_flex::frame::{FrameDecoder, FrameEncoder}; +use metrics::histogram; +use resvg::{tiny_skia, usvg}; +use serde::{Deserialize, Serialize}; +use url::Url; + +mod global; +// TODO: this shouldn't be pub +pub mod request; +mod session; +#[cfg(test)] +mod tests; + +pub use global::{ + run_garbage_collector as run_global_garbage_collector, Stats as GlobalStats, + StatsInner as GlobalStatsInner, +}; +use request::StandardRequest; + +// TODO: spawn a cache worker when creating the cache and return a handle that can communicate with +// it? Each request can be pushed to a thread-pool that shares the cache? + +const MAX_CACHE_SIZE_BYTES: u64 = 256 * 1_024 * 1_024; + +fn load_image(bytes: &[u8]) -> anyhow::Result { + let image = if let Ok(image) = ImageData::load(&bytes, true) { + image.into() + } else { + // TODO: how to verify that this is an svg? + let svg = std::str::from_utf8(bytes)?; + StableImage::from_svg(&svg) + }; + Ok(image) +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Key { + Remote(RemoteKey), + Local(PathBuf), +} + +// Internally stores a URL, but we keep it as a string to simplify DB storage and comparisons +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)] +pub struct RemoteKey(String); + +impl fmt::Display for RemoteKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl RemoteKey { + pub fn new_unchecked>(s: I) -> Self { + Self(s.into()) + } + + pub fn get(&self) -> &str { + &self.0 + } +} + +impl From for Key { + fn from(key: RemoteKey) -> Self { + Self::Remote(key) + } +} + +impl From<&RemoteKey> for Key { + fn from(key: &RemoteKey) -> Self { + key.to_owned().into() + } +} + +impl From for RemoteKey { + fn from(url: Url) -> Self { + Self(url.to_string()) + } +} + +impl From for ureq::Request { + fn from(key: RemoteKey) -> Self { + let req: StandardRequest = (&key).into(); + (&req).into() + } +} + +impl Key { + fn from_abs_path(path: PathBuf) -> Option { + if path.is_absolute() { + Some(Self::Local(path)) + } else { + None + } + } + + fn from_url(url: &str) -> anyhow::Result { + let url = Url::parse(url)?; + Ok(url.into()) + } +} + +impl From for Key { + fn from(url: Url) -> Self { + if url.scheme() == "file" { + let path = url.to_file_path().unwrap(); + Self::from_abs_path(path).expect("URLs are _always_ absolute paths") + } else { + Self::Remote(url.into()) + } + } +} + +impl From<&Key> for Key { + fn from(key_ref: &Key) -> Self { + key_ref.to_owned() + } +} + +#[derive(Clone, Debug)] +pub enum StableImage { + /// Pre-baked image data ready to be served + PreDecoded(ImageData), + /// Compressed SVG text + /// + /// SVGs get stored as the original text and rendered on demand instead of being pre-rendered + /// because the rendering for the same SVG can change depending on different dpi or font info. + /// This will likely be smaller anyways + CompressedSvg(Vec), +} + +impl StableImage { + pub fn from_svg(svg: &str) -> Self { + let mut input = io::Cursor::new(svg.as_bytes()); + // TODO: upstream a helper function that does this + let mut compressor = FrameEncoder::new(Vec::new()); + io::copy(&mut input, &mut compressor).expect("in-memory I/O failed"); + let output = compressor.finish().unwrap(); + Self::CompressedSvg(output) + } + + pub fn render(self, ctx: &SvgContext) -> ImageResult { + match self { + Self::PreDecoded(data) => Ok(data), + Self::CompressedSvg(compressed) => { + let mut svg_bytes = Vec::with_capacity(compressed.len()); + let mut decompressor = FrameDecoder::new(io::Cursor::new(compressed)); + decompressor + .read_to_end(&mut svg_bytes) + .map_err(|_| ImageError::SvgDecompressionError)?; + + let opt = usvg::Options::default(); + // TODO: loading the fontdb on every single SVG render is gonna be slow + let mut fontdb = usvg::fontdb::Database::new(); + fontdb.load_system_fonts(); + let mut tree = usvg::Tree::from_data(&svg_bytes, &opt)?; + // TODO: need to check and see if someone can pass a negative dpi and see what kind + // of issues it can cause + tree.size = tree.size.scale_to( + tiny_skia::Size::from_wh( + tree.size.width() * ctx.dpi, + tree.size.height() * ctx.dpi, + ) + .ok_or(ImageError::SvgInvalidDimensions)?, + ); + tree.postprocess(Default::default(), &fontdb); + let mut pixmap = + tiny_skia::Pixmap::new(tree.size.width() as u32, tree.size.height() as u32) + .ok_or(ImageError::SvgInvalidDimensions)?; + resvg::render(&tree, tiny_skia::Transform::default(), &mut pixmap.as_mut()); + let image_buffer = + ImageBuffer::from_raw(pixmap.width(), pixmap.height(), pixmap.data().into()) + .ok_or(ImageError::SvgContainerTooSmall)?; + Ok(ImageData::new(image_buffer, false)) + } + } + } +} + +impl From for StableImage { + fn from(data: ImageData) -> Self { + Self::PreDecoded(data) + } +} + +pub trait TimeSource: 'static { + fn now(&self) -> SystemTime; +} + +struct SystemTimeSource; + +impl TimeSource for SystemTimeSource { + fn now(&self) -> SystemTime { + SystemTime::now() + } +} + +// TODO: ban typical way of constructing to force usage of vv +/// Our custom `CacheOptions` (could be `const`) +fn cache_options() -> http_cache_semantics::CacheOptions { + // TODO: PR upstream for `const fn new() -> CacheOptions` + http_cache_semantics::CacheOptions { + // Our cache is per-user aka private + shared: false, + ..Default::default() + } +} + +pub struct Shared { + per_session: session::Cache, + time: Box, + svg_ctx: SvgContext, +} + +#[derive(Clone)] +pub struct SvgContext { + dpi: f32, +} + +impl Default for SvgContext { + fn default() -> Self { + Self { dpi: 1.0 } + } +} + +// TODO: restructure how a lot of this is done. Allow for checking the l1 cache without touching a +// db connection, and allow for either a pool of actual workers or an `Arc>` for +// a shareable in-memory db +#[derive(Clone)] +pub struct LayeredCache(Arc); + +impl LayeredCache { + pub fn new(svg_ctx: SvgContext) -> anyhow::Result { + Ok(Self::init(SystemTimeSource, svg_ctx)) + } + + #[cfg(test)] + pub fn new_with_time(time: T, svg_ctx: SvgContext) -> anyhow::Result + where + T: TimeSource, + { + Ok(Self::init(time, svg_ctx)) + } + + fn init