diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..bff29e6 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["--cfg", "tokio_unstable"] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eaf35eb..cd450a4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Install `nasm` + run: sudo apt-get install yasm -y + - name: Set up Rust toolchain uses: actions-rs/toolchain@v1 with: @@ -72,14 +75,4 @@ jobs: run: cargo nextest run - name: Run doctests - run: cargo test --doc - - - - test_semver: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check semver - uses: obi1kenobi/cargo-semver-checks-action@v2 \ No newline at end of file + run: cargo test --doc \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4be9a54..dd95a57 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ /target -raves_media_info.db/ -raves_thumbnails.db/ +**.db +**.sqlite Cargo.lock _BACKUP_BEFORE_MERGING_MEDIA_AND_RECORD/ build/ \ No newline at end of file diff --git a/.sqlx/query-300f24b05e9d9ea142221747d442b64157f80010ebf865415387fe304176631b.json b/.sqlx/query-300f24b05e9d9ea142221747d442b64157f80010ebf865415387fe304176631b.json new file mode 100644 index 0000000..214fa4b --- /dev/null +++ b/.sqlx/query-300f24b05e9d9ea142221747d442b64157f80010ebf865415387fe304176631b.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO hashes (media_id, hash) \n VALUES ($1, $2) \n ON CONFLICT(media_id)\n DO UPDATE SET\n hash = excluded.hash;\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 2 + }, + "nullable": [] + }, + "hash": "300f24b05e9d9ea142221747d442b64157f80010ebf865415387fe304176631b" +} diff --git a/.sqlx/query-5c65472bfefa4824073f4462a7822f19360f7eed979566d7e7aeea4b54b4c3cd.json b/.sqlx/query-5c65472bfefa4824073f4462a7822f19360f7eed979566d7e7aeea4b54b4c3cd.json new file mode 100644 index 0000000..c62f7e8 --- /dev/null +++ b/.sqlx/query-5c65472bfefa4824073f4462a7822f19360f7eed979566d7e7aeea4b54b4c3cd.json @@ -0,0 +1,26 @@ +{ + "db_name": "SQLite", + "query": "SELECT\n media_id as `media_id: Uuid`,\n hash\n FROM hashes\n WHERE media_id = $1", + "describe": { + "columns": [ + { + "name": "media_id: Uuid", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "hash", + "ordinal": 1, + "type_info": "Blob" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false, + false + ] + }, + "hash": "5c65472bfefa4824073f4462a7822f19360f7eed979566d7e7aeea4b54b4c3cd" +} diff --git a/.sqlx/query-b684f6f8d49f6605881a61d83f0174ebac2e8a7063cff012f36708b418b4061d.json b/.sqlx/query-b684f6f8d49f6605881a61d83f0174ebac2e8a7063cff012f36708b418b4061d.json new file mode 100644 index 0000000..2c6bb22 --- /dev/null +++ b/.sqlx/query-b684f6f8d49f6605881a61d83f0174ebac2e8a7063cff012f36708b418b4061d.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO info \n (id, path, filesize, format, creation_date, modification_date, first_seen_date, width_px, height_px, specific_metadata, other_metadata, tags)\n VALUES\n ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)\n ON CONFLICT(id)\n DO UPDATE SET\n path = excluded.path,\n filesize = excluded.filesize,\n format = excluded.format,\n creation_date = excluded.creation_date,\n width_px = excluded.width_px,\n height_px = excluded.height_px,\n specific_metadata = excluded.specific_metadata,\n other_metadata = excluded.other_metadata,\n tags = excluded.tags;\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 12 + }, + "nullable": [] + }, + "hash": "b684f6f8d49f6605881a61d83f0174ebac2e8a7063cff012f36708b418b4061d" +} diff --git a/Cargo.toml b/Cargo.toml index 5f44a7f..67e1dbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,13 @@ image = "0.25.2" infer = "0.16.0" mp4parse = "0.17.0" matroska = "0.27.0" +camino = { version = "1.1.9", features = ["serde", "serde1"] } +sqlx = { version = "0.8.3", features = ["sqlite", "chrono", "json", "macros", "uuid", "runtime-tokio"] } +chrono = { version = "0.4.39", features = ["serde"] } +constcat = "0.5.1" +uuid = { version = "1.12.0", features = ["v4", "serde"] } +avif-parse = "1.3.2" +blake3 = { version = "1.5.5", features = ["mmap", "rayon"] } # crc32fast = "1.4.2" # sys dependencies @@ -51,15 +58,10 @@ branch = "master" # features = ["build", "static"] features = ["build"] - -[dependencies.surrealdb] -package = "surrealdb-nightly" -version = "2.0.20240903" -default-features = true -features = ["kv-surrealkv"] - [dev-dependencies] async-ctrlc = "1.2.0" clap = { version = "4.5.17", features = ["derive"] } tracing-subscriber = "0.3.18" dirs = "5.0.1" +anyhow = "1.0.95" +console-subscriber = "0.4.1" diff --git a/README.md b/README.md index 8ddd6e2..5cade51 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,9 @@ Under active development. - If we go the route of having People (i.e. machine learning), we should be able to associate folks with their tags. - If a person is named "Barrett", allow users to associate them with the "barrett" tag (or any other). - for UI: warn on low overlap. + - [ ] Recommended people tags + - When they know someone is often involved with other tags, a user can add tags to show up by default. + - ex: Dad has "family", "home", "overweight", etc. - [ ] Search - You should be able to search the database for virtually anything. - [ ] Cleanup @@ -50,4 +53,15 @@ Under active development. - Locked media should only have some attributes locked, if even necessary at all. (i.e. the queue isn't running multiple things at once) - How does this affect search/navigation? +## Usage + +You'll want to do three things when using this library in the app: + +1. Setup logging to see the library's messages. (`tracing_subscriber`) +1. Make or load a configuration for the library. (`config::CONFIG`) +2. Use `database::DB_FOLDER_PATH.set()` to say where the database is (or will be) located. +3. Start the file watcher with `Watch::watch()`. + +It's important that these tasks are performed **before** using the library. Otherwise, the backend will not be correctly initialized, and bugs may result. + diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..d506869 --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +// generated by `sqlx migrate build-script` +fn main() { + // trigger recompilation when a new migration is added + println!("cargo:rerun-if-changed=migrations"); +} diff --git a/examples/checking.rs b/examples/checking.rs deleted file mode 100644 index ac26202..0000000 --- a/examples/checking.rs +++ /dev/null @@ -1,73 +0,0 @@ -use serde::{Deserialize, Serialize}; -use surrealdb::engine::local::SurrealKV; -use surrealdb::sql::Thing; -use surrealdb::Surreal; - -#[derive(Debug, Serialize)] -struct Name<'a> { - first: &'a str, - last: &'a str, -} - -#[derive(Debug, Serialize)] -struct Person<'a> { - title: &'a str, - name: Name<'a>, - marketing: bool, -} - -#[derive(Debug, Serialize)] -struct Responsibility { - marketing: bool, -} - -#[derive(Debug, Deserialize)] -struct Record { - #[allow(dead_code)] - id: Thing, -} - -#[tokio::main] -async fn main() -> surrealdb::Result<()> { - let database_path = "raves_info.db"; - - // Create database connection - let db = Surreal::new::(database_path).await?; - - // Select a specific namespace / database - db.use_ns("test").use_db("test").await?; - - // Create a new person with a random id - let created: Option = db - .create("person") - .content(Person { - title: "Founder & CEO", - name: Name { - first: "Tobidone", - last: "Morgan Hitchcock", - }, - marketing: true, - }) - .await?; - dbg!(created); - - // Update a person record with a specific id - let updated: Option = db - .update(("person", "jaime")) - .merge(Responsibility { marketing: true }) - .await?; - dbg!(updated); - - // Select all people records - let people: Vec = db.select("person").await?; - dbg!(people); - - // Perform a custom advanced query - let groups = db - .query("SELECT marketing, count() FROM type::table($table) GROUP BY marketing") - .bind(("table", "person")) - .await?; - dbg!(groups); - - Ok(()) -} diff --git a/examples/kamadak_dump_exif.rs b/examples/kamadak_dump_exif.rs new file mode 100644 index 0000000..0f42721 --- /dev/null +++ b/examples/kamadak_dump_exif.rs @@ -0,0 +1,29 @@ +use std::env; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; + +fn main() { + for path in env::args_os().skip(1).map(PathBuf::from) { + dump_file(&path); + } +} + +fn dump_file(path: &Path) { + let file = File::open(path).unwrap(); + + // To parse with continue-on-error mode: + let exif = kamadak_exif::Reader::new() + .read_from_container(&mut BufReader::new(&file)) + .unwrap(); + println!("{}", path.display()); + for f in exif.fields() { + println!( + " {}/{}: {}", + f.ifd_num.index(), + f.tag, + f.display_value().with_unit(&exif) + ); + println!(" {:?}", f.value); + } +} diff --git a/examples/media.rs b/examples/media.rs deleted file mode 100644 index 0717aa3..0000000 --- a/examples/media.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Use this to test the different types of media. - -use std::path::PathBuf; - -use tokio::{sync::RwLock, time::sleep}; - -use backdrop::{ - config::{BugReportInfo, Config}, - models::media::Media, -}; -use tracing::Level; - -// note: this can be a file or folder with MANY media files -const MEDIA_FILE_PATH: &str = "/home/barrett/Pictures/CalyxOS Backup Main/DCIM/Snapchat"; - -#[tokio::main] -async fn main() { - tracing_subscriber::fmt::fmt() - .pretty() - .with_max_level(Level::DEBUG) - .init(); - - // let's start the watch with the given paths. - // then we can see what the database looks like afterwards! - let conf = Config::new( - vec![PathBuf::from(MEDIA_FILE_PATH)], - dirs::data_dir().unwrap().join("backdrop_media_example"), - dirs::cache_dir().unwrap().join("backdrop_media_example"), - BugReportInfo { - app_name: "backdrop_media_example".to_string(), - app_version: "0.1.0".to_string(), - device: "desktop".to_string(), - display: "lineage_and_some_other_stuff".to_string(), - target_triple: "x86_64-farts-gnu".to_string(), - commit: "unknown".to_string(), - repo: "https://github.com/onkoe/backdrop".to_string(), - build_time: "unknown".to_string(), - }, - ); - - tokio::select! { - _ = wait_and_start_watcher(conf) => {}, - _ = forever_loop_and_watch_db() => {}, - _ = async_ctrlc::CtrlC::new().expect("ctrlc handler should just work") => {}, - } -} - -async fn forever_loop_and_watch_db() { - let db = backdrop::database::RavesDb::connect().await.unwrap(); - - loop { - let v_result: Result, surrealdb::Error> = db.media_info.select("info").await; - - let Ok(v) = v_result else { - tracing::info!("empty db..."); - sleep(std::time::Duration::from_secs(10)).await; - - tracing::info!("fetching new db info..."); - continue; - }; - - tracing::info!("here are all paths in the database: \n"); - for (i, m) in v.iter().enumerate() { - println!("media {i}: {}", m.metadata.path.display()); - } - tracing::info!("database: {:?}", v); - tracing::info!("..."); - sleep(std::time::Duration::from_secs(10)).await; - tracing::info!("fetching new db info..."); - } -} - -async fn wait_and_start_watcher(conf: Config) { - tracing::debug!("HEY: waiting to start watcher..."); - sleep(std::time::Duration::from_secs(10)).await; - tracing::debug!("watcher will now begin!"); - backdrop::watch::Watch::watch(RwLock::new(conf)).await -} diff --git a/examples/query.rs b/examples/query.rs index 8efb413..6798f73 100644 --- a/examples/query.rs +++ b/examples/query.rs @@ -1,16 +1,21 @@ //! Let's see if we can chain query method calls... -use std::ffi::OsString; - use backdrop::{ - database::RavesDb, - models::{media::Media, metadata::types::Orientation}, + database::DATABASE, + models::media::{ + metadata::{Orientation, Resolution}, + Media, + }, }; +use camino::Utf8PathBuf; #[tokio::main] async fn main() { - let db = RavesDb::connect().await.unwrap(); - let all: Vec = db.media_info.select("info").await.unwrap(); + let mut conn = DATABASE.acquire().await.expect("db conn"); + let all = sqlx::query_as::<_, Media>("SELECT * FROM info") + .fetch_all(&mut *conn) + .await + .unwrap(); // SEARCH: all media where: // - orientation is portrait, @@ -20,20 +25,18 @@ async fn main() { .iter() .filter(|m| { matches!( - Orientation::from(m.metadata.resolution.clone()), + Orientation::from(Resolution::new(m.width_px, m.height_px)), Orientation::Portrait ) }) .filter(|m| { - m.metadata - .path + Utf8PathBuf::from(&m.path) .file_name() - .unwrap_or(OsString::new().as_os_str()) - .to_string_lossy() + .unwrap_or_default() .to_string() .contains(('0'..='9').collect::>().as_slice()) }) - .filter(|m| m.metadata.resolution.width > 1920 && m.metadata.resolution.height > 1080) + .filter(|m| m.width_px > 1920 && m.height_px > 1080) .collect::>(); println!("found results: {:#?}", executed_search); diff --git a/examples/thumbnail.rs b/examples/thumbnail.rs index 8442c4b..e6235cb 100644 --- a/examples/thumbnail.rs +++ b/examples/thumbnail.rs @@ -2,10 +2,10 @@ use backdrop::{ config::{BugReportInfo, Config}, - database::RavesDb, + database::DATABASE, models::media::Media, }; -use surrealdb::RecordId; +use camino::Utf8PathBuf; use tracing::Level; #[tokio::main] @@ -16,8 +16,8 @@ async fn main() { Config::init_config( &[], - dirs::data_dir().unwrap(), - dirs::cache_dir().unwrap(), + Utf8PathBuf::try_from(dirs::data_dir().unwrap()).unwrap(), + Utf8PathBuf::try_from(dirs::cache_dir().unwrap()).unwrap(), BugReportInfo { app_name: "backdrop_media_example".to_string(), app_version: "0.1.0".to_string(), @@ -31,17 +31,14 @@ async fn main() { ) .await; - let db = RavesDb::connect().await.unwrap(); - let mut one = db.media_info.query("SELECT * FROM info").await.unwrap(); - - let m: Vec = one.take("media").unwrap(); - let id: Vec = one.take("id").unwrap(); - - let media = m.first().unwrap(); - let id = id.first().unwrap(); + let mut conn = DATABASE.acquire().await.expect("db connection"); + let media = sqlx::query_as::<_, Media>("SELECT * FROM info LIMIT 1") + .fetch_one(&mut *conn) + .await + .unwrap(); // create a thumbnail for it - let thumbnail = media.get_thumbnail(id).await.unwrap(); + let thumbnail = media.get_thumbnail(&media.id).await.unwrap(); thumbnail.create().await.unwrap(); println!("result should be at path: {:#?}", thumbnail.path_str()); diff --git a/migrations/0001_tables.sql b/migrations/0001_tables.sql new file mode 100644 index 0000000..ed17fbd --- /dev/null +++ b/migrations/0001_tables.sql @@ -0,0 +1,25 @@ +-- uncomment to delete old data +-- DROP TABLE info; +-- DROP TABLE thumbnail; +-- +CREATE TABLE info( + id TEXT NOT NULL PRIMARY KEY, + -- note: this would preferably be unique, but that messes with modern sqlite + -- update-insert syntax... + path TEXT NOT NULL, + filesize INTEGER NOT NULL, + format TEXT NOT NULL, + creation_date DATETIME, + modification_date DATETIME, + first_seen_date DATETIME NOT NULL, + width_px INTEGER NOT NULL, + height_px INTEGER NOT NULL, + specific_metadata TEXT NOT NULL, + other_metadata TEXT, + tags TEXT NOT NULL +); + +CREATE TABLE thumbnail( + path TEXT NOT NULL, + image_id TEXT NOT NULL PRIMARY KEY +); \ No newline at end of file diff --git a/migrations/0002_tables.sql b/migrations/0002_tables.sql new file mode 100644 index 0000000..a18e1bb --- /dev/null +++ b/migrations/0002_tables.sql @@ -0,0 +1,48 @@ +-- media info: cached metadata about tracked media files +CREATE TABLE IF NOT EXISTS info( + id TEXT NOT NULL PRIMARY KEY, + -- note: this would preferably be unique, but that messes with modern sqlite + -- update-insert syntax... + path TEXT NOT NULL, + filesize INTEGER NOT NULL, + format TEXT NOT NULL, + creation_date DATETIME, + modification_date DATETIME, + first_seen_date DATETIME NOT NULL, + width_px INTEGER NOT NULL, + height_px INTEGER NOT NULL, + specific_metadata TEXT NOT NULL, + other_metadata TEXT, + tags TEXT NOT NULL +); + +-- thumbnails: preview media +CREATE TABLE IF NOT EXISTS thumbnail( + -- path to the thumbnail on disk + path TEXT NOT NULL, + -- thumbnail is for the media file with this uuid + -- + -- TODO: migrate to `media_id` + image_id TEXT NOT NULL PRIMARY KEY +); + +-- albums: contain media +CREATE TABLE IF NOT EXISTS album( + id TEXT NOT NULL PRIMARY KEY, + name TEXT NOT NULL, + path TEXT NOT NULL, + -- + -- uuids (in json) + contained_media TEXT NOT NULL +); + +-- hashes: media file hashes to ensure metadata is up-to-date! +CREATE TABLE IF NOT EXISTS hashes( + media_id TEXT NOT NULL PRIMARY KEY, + hash BLOB NOT NULL +); + +-- hash_blob_index: tell SQLite to make a btree for the hashes, too. +-- +-- (this allows for high-speed lookups, both ways. hash <=> id) +CREATE UNIQUE INDEX IF NOT EXISTS hash_blob_index ON hashes(hash); \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index 91c99db..a48d5c0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ -use std::{path::PathBuf, sync::OnceLock}; +use std::sync::OnceLock; +use camino::Utf8PathBuf; use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use crate::error::{bug_msg, ConfigError}; @@ -13,13 +14,13 @@ pub static CONFIG: OnceLock = OnceLock::new(); #[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] pub struct Config { /// The paths that we'll be watching for new files. - pub watched_paths: Vec, + pub watched_paths: Vec, /// Path to the app's data directory. - pub data_dir: PathBuf, + pub data_dir: Utf8PathBuf, /// Path to the app's cache directory. - pub cache_dir: PathBuf, + pub cache_dir: Utf8PathBuf, /// Information for automatically reporting bugs. pub bug_report_info: BugReportInfo, @@ -27,9 +28,9 @@ pub struct Config { impl Config { pub fn new( - watched_paths: Vec, - data_dir: PathBuf, - cache_dir: PathBuf, + watched_paths: Vec, + data_dir: Utf8PathBuf, + cache_dir: Utf8PathBuf, bug_report_info: BugReportInfo, ) -> Self { Self { @@ -43,7 +44,7 @@ impl Config { /// Attempts to read a previous `Config` from disk. /// /// Note that this may fail across versions, requiring new configs. - pub async fn from_disk(data_dir: PathBuf) -> Result { + pub async fn from_disk(data_dir: Utf8PathBuf) -> Result { // read the config from disk let s = tokio::fs::read_to_string(data_dir.join("shared_prefs/config.toml")) .await @@ -67,10 +68,11 @@ impl Config { /// Use this EXACTLY ONCE to initialize the config. /// /// The app should be the only one calling this. + #[tracing::instrument] pub async fn init_config( - watched_paths: &[PathBuf], - data_dir: PathBuf, - cache_dir: PathBuf, + watched_paths: &[Utf8PathBuf], + data_dir: Utf8PathBuf, + cache_dir: Utf8PathBuf, bug_report_info: BugReportInfo, ) { if CONFIG.get().is_none() { diff --git a/src/database.rs b/src/database.rs new file mode 100644 index 0000000..ee8748d --- /dev/null +++ b/src/database.rs @@ -0,0 +1,80 @@ +//! Helps to connect to the database. + +use std::{ + str::FromStr, + sync::{LazyLock, OnceLock}, +}; + +use camino::Utf8PathBuf; +use sqlx::{ + query::Query, + sqlite::{SqliteArguments, SqliteConnectOptions}, + Pool, Sqlite, +}; + +pub const HASHES_TABLE: &str = "hashes"; +pub const INFO_TABLE: &str = "info"; +pub const THUMBNAILS_TABLE: &str = "thumbnail"; + +/// A path to the folder containing the backend's database. +/// +/// DO NOT set this to the database file - it will fail to initialize. +pub static DB_FOLDER_PATH: OnceLock = OnceLock::new(); + +/// The database pool. +/// +/// You MUST set the [`DB_FOLDER_PATH`] before attempting to access this. +/// Otherwise, the backend will panic! +pub static DATABASE: LazyLock> = LazyLock::new(|| { + const RAVES_DB_FILE: &str = "raves.sqlite"; + + // try to get the folder path (hoping the user has set the OnceLock static) + let Some(raves_db_folder) = DB_FOLDER_PATH.get() else { + tracing::error!("Attempted to access the database before initializing the path!"); + tracing::error!("Since we don't know where the database is, the backend will now panic."); + panic!("No database folder path given."); + }; + + // ensure the path exists + match raves_db_folder.try_exists() { + Ok(true) => (), + Ok(false) => { + tracing::error!("The given database folder does not exist!"); + panic!("Database folder doesn't exist."); + } + Err(e) => { + tracing::error!("Failed to check if database folder exists! err: {e}"); + tracing::warn!("This might be because of file permissions."); + panic!("Couldn't check if database folder exists. err: {e}"); + } + } + + let options = + SqliteConnectOptions::from_str(&format!("sqlite://{raves_db_folder}/{RAVES_DB_FILE}")) + .inspect_err(|e| { + tracing::error!( + "Failed to parse database string. The provided path may be incorrect. err: {e}" + ) + }) + .expect("database opts str") + .create_if_missing(true); + + // connect to the pool + let pool = sqlx::Pool::::connect_lazy_with(options); + // we'll also run migrations here real quick + _ = futures::executor::block_on(sqlx::migrate!("./migrations").run(&pool)).inspect_err(|e| { + tracing::error!( + "Database connection succeeded, but migrating the database failed! err: {e}" + ) + }); + + pool +}); + +pub trait InsertIntoTable { + /// This function provides the query that we'll execute to insert this type + /// into the table defined above. + /// + /// This only constructs a query - it does not execute it! + fn make_insertion_query(&self) -> Query<'_, Sqlite, SqliteArguments<'_>>; +} diff --git a/src/database/mod.rs b/src/database/mod.rs deleted file mode 100644 index 75155fc..0000000 --- a/src/database/mod.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Helps to connect to the database. - -use surrealdb::{ - engine::local::{Db, SurrealKV}, - Surreal, -}; - -use crate::error::DatabaseError; - -pub struct RavesDb { - pub media_info: Surreal, - pub thumbnails: Surreal, -} - -impl RavesDb { - pub const INFO_TABLE: &str = "info"; - - /// Attempts to connect to the database according to the constants. - pub async fn connect() -> Result { - const MEDIA_INFO_PATH: &str = "raves_media_info.db"; - const THUMBNAIL_PATH: &str = "raves_thumbnails.db"; - - // create database connections - let (media_info, thumbnails) = tokio::try_join! { - Surreal::new::(MEDIA_INFO_PATH), - Surreal::new::(THUMBNAIL_PATH) - }?; - - media_info.use_ns("raves").await?; - media_info.use_db("media").await?; - - Ok(Self { - media_info, - thumbnails, - }) - } -} diff --git a/src/error.rs b/src/error.rs index d08bab0..5faabaf 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,5 @@ -use async_watcher::error; +// use async_watcher::error; +use camino::Utf8PathBuf; use core::error::Error; use pisserror::Error; @@ -16,19 +17,39 @@ pub async fn bug_msg() -> String { #[derive(Debug, Error)] pub enum RavesError { + // + // other errors + // #[error("The database has encountered an error. See: `{_0}`")] DatabaseError(#[from] DatabaseError), + #[error("Error while computing hash. See: {_0}")] + HashError(#[from] HashError), + + #[error("An error occurred when processing media thumbnail data. See: `{_0}`")] + MediaThumbnail(#[from] ThumbnailError), + + // + // etc errors + // #[error("The media file at `{path}` was expected to exist, but didn't.")] MediaDoesntExist { path: String }, + #[error("Failed to open file at {path}. See: {error}")] + FailedToOpenMediaFile { + path: Utf8PathBuf, + error: std::io::Error, + }, + #[error("The media file at `{path}` does not appear to contain MIME (file type) data.")] NoMimeData { path: String }, #[error("The media file at `{path}` is not a supported media file.")] FileNotSupportedMedia { path: String }, + // // metadata + // #[error("The media file at `{_0}` was missing required metadata: {_1}")] FileMissingMetadata(String, String), @@ -47,12 +68,12 @@ pub enum RavesError { #[error("An error occured when parsing the Matroska video at `{_0}`. See: `{_1}`.")] MatroskaError(String, matroska::Error), + #[error("Failed to parse AVIF metadata. See: {_0}")] + AvifParseError(#[from] avif_parse::Error), + #[error("Failed to get file metadata for the media file at `{path}`. Err: `{err}`.")] FileMetadataFailure { path: String, err: std::io::Error }, - #[error("An error occurred when processing media thumbnail data. See: `{_0}`")] - MediaThumbnail(#[from] ThumbnailError), - #[error("A `tokio` task unexpectedly panicked. See: `{_0}`")] TokioJoinError(#[from] tokio::task::JoinError), @@ -75,7 +96,7 @@ pub enum RavesError { #[derive(Debug, Error)] pub enum DatabaseError { #[error("General database error. See: {_0}")] - GeneralDatabaseError(#[from] surrealdb::Error), + GeneralDatabaseError(#[from] sqlx::Error), #[error("Failed to connect to the database. See: {_0}")] ConnectionError(String), @@ -84,12 +105,18 @@ pub enum DatabaseError { InsertionFailed(String), #[error("Failed to complete database query. See: {_0}")] - QueryFailed(surrealdb::Error), + QueryFailed(sqlx::Error), #[error("Empty response when attempting to query database. Path: `{_0}`")] EmptyResponse(String), } +impl From for RavesError { + fn from(value: sqlx::Error) -> Self { + RavesError::DatabaseError(DatabaseError::GeneralDatabaseError(value)) + } +} + #[derive(Debug, Error)] pub enum ConfigError { /// during fs read from disk @@ -124,3 +151,13 @@ pub enum ThumbnailError { #[error("FFmpeg never found a good thumbnail for the video at path `{_0}`.")] FfmpegNoSelectedFilter(String), } + +/// An error that occurred while hashing. +#[derive(Debug, Error)] +pub enum HashError { + #[error("Failed to access the database. err: {_0}")] + DatabaseAccess(#[from] sqlx::Error), + + #[error("Failed to read file at `{_0}`. err: {_1}")] + FileReadFailure(Utf8PathBuf, std::io::Error), +} diff --git a/src/lib.rs b/src/lib.rs index 015a293..df8762d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,8 +21,8 @@ To build this, there are a few dependencies you need to install. I use Fedora, b Under active development. - [ ] GOAL: Feature-completeness - - [ ] Metadata scanning for `Media` - - [ ] Images + - [x] Metadata scanning for `Media` + - [x] Images - [ ] GIFS - [ ] Video - [ ] General (including Folder. i.e. `stat`) @@ -38,6 +38,9 @@ Under active development. - If we go the route of having People (i.e. machine learning), we should be able to associate folks with their tags. - If a person is named "Barrett", allow users to associate them with the "barrett" tag (or any other). - for UI: warn on low overlap. + - [ ] Recommended people tags + - When they know someone is often involved with other tags, a user can add tags to show up by default. + - ex: Dad has "family", "home", "overweight", etc. - [ ] Search - You should be able to search the database for virtually anything. - [ ] Cleanup @@ -47,6 +50,17 @@ Under active development. - Issueify: Implement a "queue" of operations to perform on the data. Create `Future`s for each operation and lock affected media from operations until they are no longer used. - Locked media should only have some attributes locked, if even necessary at all. (i.e. the queue isn't running multiple things at once) - How does this affect search/navigation? + +## Usage + +You'll want to do three things when using this library in the app: + +1. Setup logging to see the library's messages. (`tracing_subscriber`) +1. Make or load a configuration for the library. (`config::CONFIG`) +2. Use `database::DB_FOLDER_PATH.set()` to say where the database is (or will be) located. +3. Start the file watcher with `Watch::watch()`. + +It's important that these tasks are performed **before** using the library. Otherwise, the backend will not be correctly initialized, and bugs may result. */ pub mod config; @@ -55,18 +69,3 @@ pub mod error; pub mod models; pub mod search; pub mod watch; - -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/src/models/media/builder/avif.rs b/src/models/media/builder/avif.rs new file mode 100644 index 0000000..94ef6ba --- /dev/null +++ b/src/models/media/builder/avif.rs @@ -0,0 +1,62 @@ +use avif_parse::read_avif; +use camino::Utf8Path; +use sqlx::types::Json; + +use crate::{ + error::RavesError, + models::media::metadata::{MediaKind, SpecificMetadata}, +}; + +use super::MediaBuilder; + +impl MediaBuilder { + /// Applies EXIF data from `image` to `self`. + #[tracing::instrument(skip(self, path))] + pub(super) async fn apply_avif( + &mut self, + path: impl AsRef, + media_kind: MediaKind, + ) -> Result<(), RavesError> { + tracing::debug!("Parsing media file metadata with `avif-parse`..."); + + // cast path + let path = path.as_ref(); + + // grab data from avif. + // + // note: this spawns a blocking task, which tokio is chill with. + // i hold hope for a newfangled async api + let avif_path = path.to_path_buf(); + let avif_data = tokio::task::spawn_blocking(move || parse_avif(&avif_path)).await??; + let useful_metadata = avif_data.primary_item_metadata()?; + + // resolution + self.width_px = Some(useful_metadata.max_frame_width.get()); + self.height_px = Some(useful_metadata.max_frame_height.get()); + tracing::debug!("got resolution from `exif-parse`!"); + + // specific + self.specific_metadata = match media_kind { + MediaKind::Photo => Some(Json(SpecificMetadata::Image {})), + MediaKind::Video => { + tracing::warn!("AVIF parser should not be given video data."); + self.specific_metadata.take() + } + MediaKind::AnimatedPhoto => unimplemented!(), + }; + + Ok(()) + } +} + +/// Attempts to parse the given file as AVIF. +fn parse_avif(path: &Utf8Path) -> Result { + let mut file = std::fs::File::open(path) + .inspect_err(|e| tracing::warn!("Failed to open AVIF file for `avif-parse`. err: {e}")) + .map_err(|_| RavesError::MediaDoesntExist { + path: path.to_string(), + })?; + + Ok(read_avif(&mut file) + .inspect_err(|e| tracing::warn!("`avif-parse` failed to read the given file. err: {e}"))?) +} diff --git a/src/models/media/builder/generic.rs b/src/models/media/builder/generic.rs new file mode 100644 index 0000000..d05352b --- /dev/null +++ b/src/models/media/builder/generic.rs @@ -0,0 +1,29 @@ +use std::os::unix::fs::MetadataExt as _; + +use camino::Utf8Path; + +use crate::error::RavesError; + +use super::MediaBuilder; + +impl MediaBuilder { + /// Adds typical file attributes to `self`. + #[tracing::instrument(skip(self))] + pub(super) async fn file(&mut self, path: &Utf8Path) -> Result<(), RavesError> { + let path_str = path.to_string(); + + // err if the file doesn't open + let metadata = tokio::fs::metadata(path) + .await + .inspect_err(|e| tracing::warn!("Failed to open file for metadata. err: {e}")) + .map_err(|_e| RavesError::MediaDoesntExist { path: path_str })?; + tracing::debug!("got file metadata!"); + + self.filesize = Some(metadata.size() as i64); + self.creation_date = metadata.created().ok().map(|st| st.into()); + self.modification_date = metadata.modified().ok().map(|st| st.into()); + tracing::debug!("added file metadata to builder!"); + + Ok(()) + } +} diff --git a/src/models/media/builder/image_crate.rs b/src/models/media/builder/image_crate.rs new file mode 100644 index 0000000..34421b7 --- /dev/null +++ b/src/models/media/builder/image_crate.rs @@ -0,0 +1,42 @@ +//! A builder for basic metadata using the `image` crate. +//! +//! Note that this is effectively a fallback for when there is no other metadata +//! available. + +use camino::Utf8Path; +use image::GenericImageView as _; +use sqlx::types::Json; + +use crate::{ + error::RavesError, + models::media::metadata::{MediaKind, SpecificMetadata}, +}; + +use super::MediaBuilder; + +impl MediaBuilder { + /// Applies EXIF data from `image` to `self`. + #[tracing::instrument(skip(self))] + pub(super) async fn apply_image( + &mut self, + path: &Utf8Path, + media_kind: MediaKind, + ) -> Result<(), RavesError> { + // read the image into a buffer and grab its dimensions + let img = image::open(path).map_err(|e| RavesError::ImageError(path.to_string(), e))?; + let (width, height) = img.dimensions(); + tracing::debug!("got image dimensions from image crate: {width}x{height}"); + + // resolution + self.width_px = Some(width); + self.height_px = Some(height); + tracing::debug!("got resolution from image!"); + + // specific + if media_kind == MediaKind::Photo { + self.specific_metadata = Some(Json(SpecificMetadata::Image {})) + } + + Ok(()) + } +} diff --git a/src/models/media/builder/kamadak.rs b/src/models/media/builder/kamadak.rs new file mode 100644 index 0000000..e1c91eb --- /dev/null +++ b/src/models/media/builder/kamadak.rs @@ -0,0 +1,108 @@ +use camino::Utf8Path; +use kamadak_exif::{Exif as KamadakExif, In, Tag}; +use sqlx::types::Json; + +use crate::{ + error::RavesError, + models::media::metadata::{MediaKind, OtherMetadataMap, OtherMetadataValue, SpecificMetadata}, +}; + +use super::MediaBuilder; + +impl MediaBuilder { + /// Applies EXIF data from `kamadak_exif` to `self`. + #[tracing::instrument(skip(self))] + pub(super) async fn apply_kamadak_exif( + &mut self, + path: &Utf8Path, + media_kind: MediaKind, + ) -> Result<(), RavesError> { + let exif = look(path).await?; + tracing::debug!("got exif data from kamadak-exif!"); + + let p = In::PRIMARY; + let err = |msg: &str| { + tracing::error!("Error while building metadata with `kamadak-exif`. err: {msg}"); + RavesError::FileMissingMetadata(path.to_string(), msg.to_string()) + }; + tracing::debug!("looking for exif data..."); + + // resolution + let kamadak_exif::Value::Long(ref w) = exif + .get_field(Tag::PixelXDimension, p) + .ok_or(err("no width"))? + .value + else { + return Err(err("no width")); + }; + let kamadak_exif::Value::Long(ref h) = exif + .get_field(Tag::PixelYDimension, p) + .ok_or(err("no height"))? + .value + else { + return Err(err("no height")); + }; + + self.width_px = Some(*w.first().ok_or(err("no width"))?); + self.height_px = Some(*h.first().ok_or(err("no width"))?); + tracing::debug!("got resolution from exif!"); + + // specific + if media_kind == MediaKind::Photo { + self.specific_metadata = Some(Json(SpecificMetadata::Image {})); + tracing::debug!("got specific metadata from exif!"); + } + + // other + let mut mapped = OtherMetadataMap::new(); + for field in exif.fields() { + let key = field.tag.to_string(); + let value = OtherMetadataValue { + user_facing_name: Some(key.clone()), + value: field.display_value().to_string(), + }; + + mapped.0.insert(key, value); + } + self.other_metadata = Some(Json(mapped)); + tracing::debug!("got other metadata from exif!"); + + tracing::debug!("finished looking for exif data!"); + + Ok(()) + } +} + +/// We use this function to 'look' at the metadata of the file, returning EXIF +/// information from `kamadak_exif`. +/// +/// This is `async` as we use `tokio` to grab a file handle, then spawn a task +/// to process it synchronously, awaiting its completion. +async fn look(path: &Utf8Path) -> Result { + let path = path.to_path_buf(); // extends lifetime by copying data + + // grab the file with tokio (avoid blocking) + let file = tokio::fs::File::open(path.to_path_buf()) + .await + .inspect_err(|e| tracing::warn!("Failed to open file for `kamadak_exif`! err: {e}")) + .map_err(|e| RavesError::FileMetadataFailure { + path: path.clone().into(), + err: e, + })? + .into_std() + .await; + + // make a buffer where we'll read the file + let mut buf_reader = std::io::BufReader::new(file); + let exif_reader = kamadak_exif::Reader::new(); + + // hand that off to `tokio` + tokio::task::spawn_blocking(move || -> Result { + exif_reader + .read_from_container(&mut buf_reader) + .inspect_err(|e| tracing::warn!("`kamadak-exif` failed to get metadata. err: {e}")) + .map_err(|e| RavesError::KamadakExifError(path.to_string(), e)) + }) + .await + .map_err(RavesError::TokioJoinError)? +} diff --git a/src/models/metadata/builder/matroska.rs b/src/models/media/builder/matroska.rs similarity index 65% rename from src/models/metadata/builder/matroska.rs rename to src/models/media/builder/matroska.rs index 994277f..452f75c 100644 --- a/src/models/metadata/builder/matroska.rs +++ b/src/models/media/builder/matroska.rs @@ -3,44 +3,41 @@ //! Note that this is effectively a fallback for when there is no other metadata //! available. -use std::path::Path; - use crate::{ error::RavesError, - models::metadata::{ - types::{Format, MediaKind, Resolution}, - SpecificMetadata, - }, + models::media::metadata::{MediaKind, SpecificMetadata}, }; +use camino::Utf8Path; use matroska::Settings; +use sqlx::types::Json; -use super::MetadataBuilder; +use super::MediaBuilder; -impl MetadataBuilder { +impl MediaBuilder { /// Applies Matroska data from `matroska` to `self`. #[tracing::instrument(skip(self))] pub(super) async fn apply_matroska( &mut self, - path: &Path, - format: Format, + path: &Utf8Path, + media_kind: MediaKind, ) -> Result<(), RavesError> { - if format.media_kind() == MediaKind::Video { - let mkv = matroska::open(path) - .map_err(|e| RavesError::MatroskaError(path.display().to_string(), e))?; + if media_kind == MediaKind::Video { + let mkv = + matroska::open(path).map_err(|e| RavesError::MatroskaError(path.to_string(), e))?; let vt = mkv .video_tracks() .next() .ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no video track".into(), ))?; // resolution if let Settings::Video(v) = &vt.settings { - self.resolution = - Some(Resolution::new(v.pixel_width as u32, v.pixel_height as u32)); + self.width_px = Some(v.pixel_width as u32); + self.height_px = Some(v.pixel_height as u32); tracing::debug!( "got resolution from matroska: width {}, height {}", v.pixel_width, @@ -50,9 +47,9 @@ impl MetadataBuilder { // specific if let Some(duration) = mkv.info.duration { - self.specific = Some(SpecificMetadata::Video { + self.specific_metadata = Some(Json(SpecificMetadata::Video { length: duration.as_secs_f64(), - }); + })); tracing::debug!( "got video duration from matroska: length {}", duration.as_secs_f64() diff --git a/src/models/media/builder/mod.rs b/src/models/media/builder/mod.rs new file mode 100644 index 0000000..2957217 --- /dev/null +++ b/src/models/media/builder/mod.rs @@ -0,0 +1,428 @@ +//! # Metadata Builders +//! +//! Assists in ascertaining metadata of different kinds in one format. +//! +//! Note that this should eventually be replaced with a fleshed-out library +//! with full support for all these types! + +pub mod avif; +pub mod generic; +pub mod image_crate; +pub mod kamadak; +pub mod matroska; +pub mod mp4parse; +pub mod nom; + +use camino::Utf8Path; +use chrono::{DateTime, Utc}; +use sqlx::types::Json; +use uuid::Uuid; + +use crate::{ + database::{DATABASE, HASHES_TABLE, INFO_TABLE}, + error::RavesError, + models::{ + media::{metadata::MediaKind, Media}, + tags::Tag, + }, +}; + +use super::{ + hash::MediaHash, + metadata::{Format, OtherMetadataMap, SpecificMetadata}, +}; + +/// A media file's metadata. Common metadata is always present, while the `other` +/// field represents that which isn't standard in a dictionary (string, string) +/// form. +#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] +pub struct MediaBuilder { + /// How large the file is, in bytes. + pub filesize: Option, + + /// The MIME type (format) of the file. + pub format: Option>, + + /// The time the file was created, according to the file system. + /// + /// This could be inaccurate or missing depending on the file's source. + pub creation_date: Option>, + + /// The time the file was last modified, according to the file system. + /// + /// Might be inaccurate or missing. + pub modification_date: Option>, + + /// The media's width (horizontal) in pixels. + pub width_px: Option, + + /// The media's height (vertical) in pixels. + pub height_px: Option, + + /// Additional metadata that's specific to the media's kind, such as a + /// video's framerate. + pub specific_metadata: Option>, + + /// Metadata that isn't immensely common, but can be read as a string. + /// + /// Or, in other words, it's a hashmap of data. + /// + /// This is stored as `Json` for the database. + pub other_metadata: Option>, + + /// The tags of a media file. Note that these can come from the file's EXIF + /// metadata or Rave's internals. + pub tags: Json>, +} + +impl MediaBuilder { + /// Constructs a [`Media`] file representation from this [`MediaBuilder`]. + #[tracing::instrument(skip(self))] + pub(super) async fn build + std::fmt::Debug>( + self, + path: P, + ) -> Result { + let path = path.as_ref(); + self.build_internal(path).await + } + + /// The internal 'build' function to create a [`Media`] from [`MediaBuilder`]. + /// This should only be called from [`MediaBuilder::build`]. + /// + /// NOTE: `path` **must** be an absolute path that's been canonicalized. + /// + /// It has the following pipeline: + /// + /// 1. Grab format of `path`. + /// 2. Apply it to self, but match on the contained `MediaKind` to better + /// determine next steps. + /// 3. Based on our MediaKind... + /// - If we're a photo or animated photo, + /// - AVIF only: apply `avif_parse` crate + /// - TIFF/JPEG/HEIF/PNG/WebP: apply `kamadak_exif` crate + /// - anything: apply `image` crate + /// - If we're a video, + /// - MP4/MOV only: apply `nom_exif` crate + /// - MP4 only: apply `mp4parse` crate + /// - MOV/MKV/WebM: apply `matroska` crate + /// 4. Check for a previous cache of the media. + /// 5. If present, steal its UUID and first-seen datetime. + /// 6. Unwrap all fields and stick into a new `Media`. + /// 7. Return it. + #[tracing::instrument(skip(self))] + async fn build_internal(mut self, path: &Utf8Path) -> Result { + // grab format and apply it to self + let format = format(path).await?; + let mime_type = format.mime_type(); + let media_kind = format.media_kind(); + self.format = Some(Json(format)); + + // grab file metadata real quick + _ = self + .file(path) + .await + .inspect_err(|e| tracing::warn!("Failed to get file metadata! err: {e}")); + + // based on the 'kind' of media we're dealing with, we'll choose different + // libraries to apply to internal metadata + match &media_kind { + MediaKind::Photo | MediaKind::AnimatedPhoto => { + // if we're avif, apply the avif crate + let avif_result = if mime_type.to_lowercase().contains("avif") { + self.apply_avif(path, media_kind) + .await + .map_err(|e| tracing::warn!("Failed to parse with `avif_parse`. err: {e}")) + } else { + Err(()) + }; + + // really this is only for tiff/jpeg/heif/png/webp, but we can + // parse everything since there's a lot of other not-well-known + // file types between all those + let kamadak_result = self + .apply_kamadak_exif(path, media_kind) + .await + .map_err(|e| tracing::debug!("Failed to aprse with `kamadak_exif`. err: {e}")); + + // finally, use the `image` crate when we're out of luck :p + if avif_result.is_err() && kamadak_result.is_err() { + _ = self + .apply_image(path, media_kind) + .await + .map_err(|e| tracing::error!("Failed to parse with `image`! err: {e}")); + } + } + + MediaKind::Video => { + // ffmpeg: get video length + let specific_metadata = get_video_len(path) + .inspect_err(|e| tracing::error!("Failed to get video length. err: {e}"))?; + + self.specific_metadata = Some(Json(specific_metadata)); + + // apply `mp4` + _ = self.apply_mp4parse(path, media_kind).await.map_err(|e| { + tracing::debug!("Failed to parse with `mp4parse`. err: {e}"); + }); + + // apply `matroska` + _ = self.apply_matroska(path, media_kind).await.map_err(|e| { + tracing::debug!("Failed to parse with `matroska`. err: {e}"); + }); + + // apply `nom_exif` + _ = self.apply_nom_exif(path, media_kind).await; + } + } + + // grab the static fields + let StaticFields { + id, + first_seen_date, + } = get_static_fields(path).await?; + + Ok(Media { + id, + + path: path.to_string(), + filesize: self.filesize.ok_or(RavesError::FileMissingMetadata( + path.to_string(), + "no file size given".into(), + ))?, + creation_date: self.creation_date, + modification_date: self.modification_date, + + format: self.format.ok_or(RavesError::FileMissingMetadata( + path.to_string(), + "no format given".into(), + ))?, + width_px: self.width_px.ok_or(RavesError::FileMissingMetadata( + path.to_string(), + "no width (res) given".into(), + ))?, + height_px: self.height_px.ok_or(RavesError::FileMissingMetadata( + path.to_string(), + "no width (res) given".into(), + ))?, + specific_metadata: self + .specific_metadata + .ok_or(RavesError::FileMissingMetadata( + path.to_string(), + "no specific metadata (file kind variant)".into(), + ))?, + other_metadata: self.other_metadata, + + first_seen_date, + + tags: self.tags, + }) + } +} + +/// Grabs the format of the media file at `path`. +#[tracing::instrument] +async fn format(path: &Utf8Path) -> Result { + let path_str = path.to_string(); + + // infer the MIME type for the file + tracing::debug!("Grabbing MIME type..."); + let mime = infer::get_from_path(path) + .map_err(|_e| RavesError::MediaDoesntExist { + path: path_str.clone(), + })? + .ok_or(RavesError::NoMimeData { + path: path_str.clone(), + })?; + + // make the format + tracing::debug!("Creating format from MIME..."); + let format = Format::new_from_mime(mime.mime_type()) + .ok_or(RavesError::FileNotSupportedMedia { + path: path_str.clone(), + }) + .inspect_err(|e| tracing::error!("Failed to create MIME type! err: {e}"))?; + + Ok(format) +} + +/// Either steals or creates the static fields required to create a [`Media`]. +#[tracing::instrument] +async fn get_static_fields(path: &Utf8Path) -> Result { + // if the media was previously saved in the database, we'll need to use + // its id and 'first seen date' + let (id, first_seen_date) = 'a: { + let mut conn = DATABASE.acquire().await.inspect_err(|e| { + tracing::error!("Failed to connect to database in metadata builder! err: {e}") + })?; + + // if we find our path in there, we can just use the old stuff + let old_media_path_query = + sqlx::query_as::<_, Media>(&format!("SELECT * FROM {INFO_TABLE} WHERE path = $1")) + .bind(path.to_string()) + .fetch_optional(&mut *conn) + .await + .inspect_err(|e| tracing::error!("(path) Failed to query database! err: {e}"))?; + + if let Some(old_media) = old_media_path_query { + break 'a (old_media.id, old_media.first_seen_date); + } + + // we can also check for duplicate photos, as that's fair game for + // 'first seen', though we'll also need to create a new UUID. + if let Ok(hash) = MediaHash::hash_file(path).await { + let old_media_hash_query = sqlx::query_as::<_, Media>(&format!( + "SELECT * FROM {HASHES_TABLE} WHERE hash = $1" + )) + .bind(hash.as_bytes().to_vec()) + .fetch_optional(&mut *conn) + .await + .inspect_err(|e| tracing::error!("(hash) Failed to query database! err: {e}"))?; + + if let Some(old_media) = old_media_hash_query { + break 'a (Uuid::new_v4(), old_media.first_seen_date); + } + } + + (Uuid::new_v4(), Utc::now()) + }; + + Ok(StaticFields { + id, + first_seen_date, + }) +} + +/// Fields that don't change across metadata generations. +struct StaticFields { + id: Uuid, + first_seen_date: DateTime, +} + +impl Default for MediaBuilder { + fn default() -> Self { + Self { + filesize: None, + format: None, + creation_date: None, + modification_date: None, + width_px: None, + height_px: None, + specific_metadata: None, + other_metadata: None, + tags: Json(vec![]), + } + } +} + +/// Grabs the video length of a media file using FFmpeg. +pub fn get_video_len(path: &Utf8Path) -> Result { + let path_str = path.to_string(); + + // let's ask ffmpeg what it thinks + tracing::trace!("video detected. asking ffmpeg to handle..."); + ffmpeg_next::init()?; + let t = ffmpeg_next::format::input(path).map_err(|e| RavesError::FfmpegFailedProcessing { + path: path_str.clone(), + err: e.to_string(), + })?; + + // grab the first video stream and see how long it is + let video_length = t + .streams() + .find(|s| s.parameters().medium() == ffmpeg_next::media::Type::Video) + .map(|s| (ffmpeg_next::Rational::new(s.duration() as i32, 1)) * s.time_base()) + .map(|s| s.0 as f64 / s.1 as f64) + .unwrap_or(0_f64); + tracing::trace!("video len is {video_length}."); + + Ok(SpecificMetadata::Video { + length: video_length, + }) +} + +#[cfg(test)] +mod tests { + use std::env::temp_dir; + + use camino::Utf8PathBuf; + use chrono::{DateTime, Utc}; + use sqlx::types::Json; + use uuid::Uuid; + + use crate::{ + database::{self, InsertIntoTable as _, DATABASE, INFO_TABLE}, + models::media::{ + metadata::{Format, SpecificMetadata}, + Media, + }, + }; + + use super::MediaBuilder; + + /// The `MediaBuilder` should keep the `id` and `first_seen_date` fields as-is. + #[tokio::test] + async fn media_builder_keeps_static_fields() { + // set up the db + database::DB_FOLDER_PATH + .set(Utf8PathBuf::try_from(temp_dir()).unwrap()) + .unwrap(); + + let path = Utf8PathBuf::from("tests/assets/fear.avif") + .canonicalize_utf8() + .unwrap(); + + // add a fake file to it + let old_media = Media { + id: Uuid::nil(), + path: path.to_string(), + filesize: 0, + format: Json(Format::new_from_mime("image/avif").unwrap()), + creation_date: None, + modification_date: None, + first_seen_date: DateTime::::MIN_UTC, + width_px: 32, + height_px: 32, + specific_metadata: Json(SpecificMetadata::Image {}), + other_metadata: None, + tags: Json(vec![]), + }; + + // insert into db + let mut conn = DATABASE.acquire().await.unwrap(); + old_media + .make_insertion_query() + .execute(&mut *conn) + .await + .unwrap(); + + // now run the media builder on a real file... + let new_media = MediaBuilder::default().build(&path).await.unwrap(); + + assert_eq!(old_media.id, new_media.id, "same uuids"); + assert_eq!( + old_media.first_seen_date, new_media.first_seen_date, + "same first seen dates" + ); + + // insert into the database and ensure they're still accurate + new_media + .make_insertion_query() + .execute(&mut *conn) + .await + .unwrap(); + let inserted_new_media = + sqlx::query_as::<_, Media>(&format!("SELECT * FROM {INFO_TABLE} LIMIT 1")) + .fetch_one(&mut *conn) + .await + .unwrap(); + + assert_eq!( + old_media.id, inserted_new_media.id, + "post-insert same uuids" + ); + assert_eq!( + old_media.first_seen_date, inserted_new_media.first_seen_date, + "post-insert same first seen dates" + ); + } +} diff --git a/src/models/metadata/builder/mp4parse.rs b/src/models/media/builder/mp4parse.rs similarity index 65% rename from src/models/metadata/builder/mp4parse.rs rename to src/models/media/builder/mp4parse.rs index ae01323..4206d60 100644 --- a/src/models/metadata/builder/mp4parse.rs +++ b/src/models/media/builder/mp4parse.rs @@ -3,37 +3,35 @@ //! Note that this is effectively a fallback for when there is no other metadata //! available. -use std::path::Path; +use camino::Utf8Path; +use sqlx::types::Json; use crate::{ error::RavesError, - models::metadata::{ - builder::get_video_len, - types::{Format, MediaKind, Resolution}, - }, + models::media::{builder::get_video_len, metadata::MediaKind}, }; -use super::MetadataBuilder; +use super::MediaBuilder; -impl MetadataBuilder { +impl MediaBuilder { /// Applies EXIF data from `mp4parse` to `self`. #[tracing::instrument(skip(self))] pub(super) async fn apply_mp4parse( &mut self, - path: &Path, - format: Format, + path: &Utf8Path, + media_kind: MediaKind, ) -> Result<(), RavesError> { // check if it's a video - if format.media_kind() == MediaKind::Video { + if media_kind == MediaKind::Video { let mut f = std::fs::File::open(path).map_err(|e| RavesError::FileMetadataFailure { - path: path.display().to_string(), + path: path.to_string(), err: e, })?; // read the file let info = tokio::task::spawn_blocking(move || mp4parse::read_mp4(&mut f)) .await? - .map_err(|e| RavesError::Mp4parseError(path.display().to_string(), e))?; + .map_err(|e| RavesError::Mp4parseError(path.to_string(), e))?; // grab first video track if let Some(track) = info @@ -42,16 +40,21 @@ impl MetadataBuilder { .find(|t| t.track_type == mp4parse::TrackType::Video) { let header = track.tkhd.clone().ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no track header".into(), ))?; // resolution - self.resolution = Some(Resolution::new(header.width, header.height)); - tracing::debug!("got resolution from mp4parse!"); + self.width_px = Some(header.width); + self.height_px = Some(header.height); + tracing::debug!( + "got resolution from mp4parse! ({} x {})", + header.width, + header.height + ); // specific - self.specific = Some(get_video_len(path)?); + self.specific_metadata = Some(Json(get_video_len(path)?)); // other if let Some(Ok(userdata)) = info.userdata { diff --git a/src/models/metadata/builder/nom.rs b/src/models/media/builder/nom.rs similarity index 75% rename from src/models/metadata/builder/nom.rs rename to src/models/media/builder/nom.rs index 0370c46..0ba3e61 100644 --- a/src/models/metadata/builder/nom.rs +++ b/src/models/media/builder/nom.rs @@ -1,71 +1,65 @@ -use std::path::Path; - +use camino::Utf8Path; use nom_exif::{parse_exif_async, Exif as NomExif, ExifIter, ExifTag}; +use sqlx::types::Json; use tokio::task::spawn_blocking; -use tokio::try_join; use crate::{ error::RavesError, - models::metadata::{ + models::media::{ builder::get_video_len, - types::{Format, MediaKind, Resolution}, - OtherMetadataMap, OtherMetadataValue, SpecificMetadata, + metadata::{MediaKind, OtherMetadataMap, OtherMetadataValue, SpecificMetadata}, }, }; -use super::MetadataBuilder; +use super::MediaBuilder; -impl MetadataBuilder { +impl MediaBuilder { /// Applies EXIF data from `nom_exif` to `self`. #[tracing::instrument(skip(self))] pub(super) async fn apply_nom_exif( &mut self, - path: &Path, - format: Format, + path: &Utf8Path, + media_kind: MediaKind, ) -> Result<(), RavesError> { tracing::debug!("grabbing exif data..."); - let (_, (iter, exif)) = try_join! { - self.file(path), - fut(path), - }?; + let (iter, exif) = fut(path).await?; tracing::debug!("got exif data!"); - let media_kind = format.media_kind(); - // look for cool shit in the exif // res let w = exif .get(ExifTag::ImageWidth) .ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no width".into(), ))? .as_u32() .ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no width".into(), ))?; let h = exif .get(ExifTag::ImageHeight) .ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no height".into(), ))? .as_u32() .ok_or(RavesError::FileMissingMetadata( - path.display().to_string(), + path.to_string(), "no width".into(), ))?; - self.resolution = Some(Resolution::new(w, h)); + self.width_px = Some(w); + self.height_px = Some(h); tracing::debug!("got resolution from exif!"); // specific - self.specific = Some(match media_kind { + self.specific_metadata = Some(Json(match media_kind { MediaKind::Photo => SpecificMetadata::Image {}, MediaKind::Video => get_video_len(path)?, MediaKind::AnimatedPhoto => unimplemented!(), - }); + })); tracing::debug!("got specific metadata from exif!"); // other @@ -84,7 +78,7 @@ impl MetadataBuilder { mapped.0.insert(key, value); } - self.other = Some(Some(mapped)); + self.other_metadata = Some(Json(mapped)); tracing::debug!("got other metadata from exif!"); tracing::debug!("finished looking for exif data!"); @@ -93,8 +87,8 @@ impl MetadataBuilder { } } -async fn fut(path: &Path) -> Result<(ExifIter, NomExif), RavesError> { - let path_str = path.display().to_string(); +async fn fut(path: &Utf8Path) -> Result<(ExifIter, NomExif), RavesError> { + let path_str = path.to_string(); let file = tokio::fs::File::open(&path) .await diff --git a/src/models/media/hash.rs b/src/models/media/hash.rs new file mode 100644 index 0000000..d091008 --- /dev/null +++ b/src/models/media/hash.rs @@ -0,0 +1,155 @@ +use camino::Utf8Path; +use sqlx::{query::Query, sqlite::SqliteArguments, Sqlite}; +use uuid::Uuid; + +use crate::{ + database::{InsertIntoTable, DATABASE}, + error::{DatabaseError, HashError}, +}; + +use super::Media; + +/// A hash for a media file, stored in the [`HASHES_TABLE`]. +#[derive(Clone, Debug, PartialEq, PartialOrd, Hash, sqlx::FromRow)] +pub struct MediaHash { + /// The media file's UUID. + pub media_id: Uuid, + /// The media file's hash. + pub hash: Vec, +} + +impl MediaHash { + /// Creates a new [`MediaHash`] from the given required components. + /// + /// This will actually compute the hash of the file. Use struct + /// construction instead if you've already got it. + #[tracing::instrument] + pub async fn new + core::fmt::Debug>( + media_id: Uuid, + path: P, + ) -> Result { + let path = path.as_ref(); + + let blake3_hash = Self::hash_file(path).await?; + + Ok(Self { + hash: blake3_hash.as_bytes().into(), + media_id, + }) + } + + /// Hashes the file at the given path. + #[tracing::instrument] + pub async fn hash_file + core::fmt::Debug>( + path: P, + ) -> Result { + let path = path.as_ref(); + let mut hasher = blake3::Hasher::new(); + + // read the file and get its hash + hasher + .update_mmap_rayon(path) + .inspect_err(|e| tracing::warn!("`blake3` file hashing failed! err: {e}")) + .map_err(|e| HashError::FileReadFailure(path.to_path_buf(), e)) + .map(|hasher| hasher.finalize()) + } + + /// Attempts to add this hash to the [`HASHES_TABLE`]. + #[tracing::instrument] + pub async fn add_to_table(&self) -> Result<(), DatabaseError> { + let mut conn = DATABASE + .acquire() + .await + .inspect_err(|e| tracing::error!("Failed to connect to database. err: {e}")) + .map_err(|e| DatabaseError::ConnectionError(e.to_string()))?; + + self.make_insertion_query() + .execute(&mut *conn) + .await + .inspect_err(|e| tracing::error!("Hash insertion failed! err: {e}")) + .map_err(|e| DatabaseError::InsertionFailed(e.to_string())) + .map(|_query_response| ()) + } +} + +impl InsertIntoTable for MediaHash { + #[tracing::instrument] + fn make_insertion_query(&self) -> Query<'_, Sqlite, SqliteArguments<'_>> { + // NOTE: if changing `HASHES_TABLE`, also change this! + sqlx::query!( + r#" + INSERT INTO hashes (media_id, hash) + VALUES ($1, $2) + ON CONFLICT(media_id) + DO UPDATE SET + hash = excluded.hash; + "#, + self.media_id, + self.hash + ) + } +} + +/// Whether a media file's hash is up-to-date. +#[derive(Clone, Copy, Debug, Hash, PartialEq, PartialOrd)] +pub enum HashUpToDate { + UpToDate, + Outdated, + NotInDatabase, +} + +impl Media { + /// Computes this media file's hash. + /// + /// It also checks if the media file's hash is up-to-date in the database, but + /// DOES NOT update it. + /// + /// ## Errors + /// + /// This method can fail if the backing file no longer exists or the + /// database connection errors. + pub async fn hash(&self) -> Result<(MediaHash, HashUpToDate), HashError> { + let mut conn = DATABASE + .acquire() + .await + .inspect_err(|e| tracing::error!("Database connection failed! err: {e}"))?; + + // get old hash + let old_hash_query = sqlx::query_as!( + MediaHash, + r#"SELECT + media_id as `media_id: Uuid`, + hash + FROM hashes + WHERE media_id = $1"#, + self.id + ) + .fetch_optional(&mut *conn) + .await + .inspect_err(|e| { + tracing::debug!("Didn't find old hash in hashes table. ignored and totally ok err: {e}") + }); + + // generate new hash + let new_hash = MediaHash::new(self.id, &self.path).await?; + + // check if they match. + // + // if they don't, we'll complain and tell the caller + let mut is_up_to_date = HashUpToDate::NotInDatabase; + if let Ok(Some(old_hash)) = old_hash_query { + if old_hash != new_hash { + tracing::debug!( + "Hash mismatch! {:#x?} != {:#x?}", + old_hash.hash, + new_hash.hash + ); + is_up_to_date = HashUpToDate::Outdated; + } else { + is_up_to_date = HashUpToDate::UpToDate; + } + } + + Ok((new_hash, is_up_to_date)) + } +} diff --git a/src/models/media/load.rs b/src/models/media/load.rs index f3e5939..c42114a 100644 --- a/src/models/media/load.rs +++ b/src/models/media/load.rs @@ -1,73 +1,177 @@ -use std::path::{Path, PathBuf}; +use camino::{Utf8Path, Utf8PathBuf}; use crate::{ - database::RavesDb, + database::{InsertIntoTable, DATABASE, INFO_TABLE}, error::{DatabaseError, RavesError}, - models::metadata::builder::MetadataBuilder, + models::media::{builder::MediaBuilder, hash::MediaHash}, }; use super::Media; -impl Media { - /// Gets a `Media` from disk or cache. - #[tracing::instrument] - pub async fn new(path: PathBuf) -> Result { - let db = RavesDb::connect().await?; - // query the db for image - let mut results = db - .media_info - .query("SELECT * FROM info WHERE path = $path") - .bind(("path", path.clone())) - .await - .map_err(DatabaseError::QueryFailed)?; +/** Loads a media file. + +This function is the internal implementation of the [`Media::load`] function. +It should not be called from any other location. + +## Pipeline + +This has its own little 'pipeline'. In short: - let r: Result, surrealdb::Error> = results.take(0); +1. Check that the given path exists on disk. +2. Expand the path to be "canonicalized". +3. Look for an existing database entry with this path. +4. Based on if: + - Entry exists: hash both and compare. + - Hash the new file. + - Use the old entry's hash. + - Based on their equality: + - Equal: Return the old media file from cache. + - Unequal: Load from disk. + - Entry does not exist: Just load the file from disk. +*/ +#[tracing::instrument] +pub(super) async fn load_internal(path: &Utf8Path) -> Result { + // check that its path exists + ensure_exists(path).await?; - if let Ok(Some(media)) = r { - // return it here - Ok(media) + // canonicalize it + let path = canonicalize_path(path).await; + + // grab our hash + let new_hash = MediaHash::hash_file(&path).await?; + + // if there's an old entry, we'll try to reuse it + if let Some(old_entry) = from_database(&path).await? { + tracing::trace!("Found an old entry!"); + let old_entry_hash = MediaHash::hash_file(&old_entry.path).await?; + + // when the hashes match, we'll just return the old media! + if old_entry_hash == new_hash { + tracing::debug!("Old and new entries had the same hash! Returning early..."); + return Ok(old_entry); } else { - // otherwise, make the metadata ourselves - Self::load_from_disk(&path).await + tracing::trace!("Old and new entries differed! Recomputing metadata!"); } + } else { + tracing::trace!("No previous entry was located."); + } + + // load from disk + save hash to table + tracing::debug!("Generating metadata from disk..."); + let media = from_disk(&path).await?; + + // save hash to table + { + // construct hash structure for db + let hash = MediaHash { + media_id: media.id, + hash: new_hash.as_bytes().into(), + }; + + // insert it + hash.add_to_table().await?; } - /// Loads file (with metadata) from disk... no matter what. - #[tracing::instrument] - pub async fn load_from_disk(path: &Path) -> Result { - // make sure the file exists - let path_str = path.display().to_string(); - if !path.exists() { - tracing::error!("the file doesn't exist"); + // finally, return the media :) + Ok(media) +} + +/// Checks to ensure that the media file exists. +/// +/// ## Errors +/// +/// Will error if the function can't actually check if the path exists. +#[tracing::instrument] +async fn ensure_exists(path: &Utf8Path) -> Result<(), RavesError> { + match tokio::fs::try_exists(path).await { + Ok(true) => { + tracing::trace!("The requested media file exists on disk."); + return Ok(()); + } + Ok(false) => { + tracing::warn!("File does not exist on disk!"); return Err(RavesError::MediaDoesntExist { - path: path_str.clone(), + path: path.to_string(), + }); + } + Err(e) => { + tracing::error!("Failed to check if file exists on disk! err: {e}"); + return Err(RavesError::FailedToOpenMediaFile { + path: path.to_path_buf(), + error: e, }); } + } +} - // get metadata - tracing::debug!("checking file properties..."); - let metadata = MetadataBuilder::default().apply(path).await?; - - // ok ok... we have everything else. let's save it now! - tracing::debug!("saving media to database..."); - let db = RavesDb::connect().await?; - let v: Vec = db - .media_info - .insert("info") - .content(Self { - metadata, - tags: Vec::new(), // TODO - }) - .await - .map_err(|e| DatabaseError::InsertionFailed(e.to_string()))?; +/// Attempts to turn relative paths (`project/my_image.avif`) to absolute ones +/// with no links (like `/home/barrett/projects/my_image.avif`). +/// +/// If it fails, it'll just return the original path. +#[tracing::instrument] +async fn canonicalize_path(path: &Utf8Path) -> Utf8PathBuf { + path.canonicalize_utf8() + .inspect_err(|e| tracing::warn!("Failed to canon-ize path. err: {e}")) + .unwrap_or_else(|_| path.to_path_buf()) +} - let constructed = v - .first() - .ok_or(DatabaseError::InsertionFailed( - "didn't get anything from return vec! :p".into(), - ))? - .clone(); +/// Attempts to grab a media file with `path` from the database. This returns +/// an Option, as that might not be around. +/// +/// ## Errors +/// +/// Might error if the database connection fails. +#[tracing::instrument] +async fn from_database(path: &Utf8Path) -> Result, RavesError> { + // grab db connection + let mut conn = DATABASE + .acquire() + .await + .inspect_err(|e| tracing::error!("Failed to connect to database! err: {e}"))?; + + // query for an entry with matching path + sqlx::query_as::<_, Media>(&format!( + "SELECT * FROM {INFO_TABLE} WHERE path = $1 LIMIT 1" + )) + .bind(path.to_string()) + .fetch_optional(&mut *conn) + .await + .inspect_err(|e| { + tracing::warn!("Failed to query database for old version of media file! err: {e}") + }) + .map_err(|e| e.into()) +} - Ok(constructed) +/// Grabs metadata from disk using the `MediaBuilder` API. It'll then cache it +/// into the database. +/// +/// ## Errors +/// +/// Might fail if the media file isn't compatible or the database fails to +/// connect. +#[tracing::instrument] +async fn from_disk(path: &Utf8Path) -> Result { + // grab the media file metadata + tracing::trace!("Feeding media file path to MediaBuilder..."); + let media = MediaBuilder::default().build(path).await?; + + // cache in database + { + let mut conn = DATABASE + .acquire() + .await + .inspect_err(|e| tracing::error!("Failed to connect to database! err: {e}"))?; + + media + .make_insertion_query() + .execute(&mut *conn) + .await + .inspect_err(|e| { + tracing::warn!("Failed to insert media into database. err: {e}, media: {media:#?}"); + }) + .map_err(|e| DatabaseError::InsertionFailed(e.to_string()))?; } + + // return the media + Ok(media) } diff --git a/src/models/metadata/types.rs b/src/models/media/metadata.rs similarity index 83% rename from src/models/metadata/types.rs rename to src/models/media/metadata.rs index e648c0d..7d62d40 100644 --- a/src/models/metadata/types.rs +++ b/src/models/media/metadata.rs @@ -1,3 +1,51 @@ +use std::collections::HashMap; + +/// Metadata "specific" to one type of media. +#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] +pub enum SpecificMetadata { + #[non_exhaustive] + Image {}, + + #[non_exhaustive] + AnimatedImage { + frame_count: u32, + framerate: Framerate, + }, + + #[non_exhaustive] + Video { length: f64 }, +} + +#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] +pub struct OtherMetadataValue { + pub user_facing_name: Option, + pub value: String, +} + +/// A representation for uncommon metadata that can only be read. +/// +/// Also, it's a `HashMap` newtype to get around the lack of `PartialOrd`. +#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] +pub struct OtherMetadataMap(pub HashMap); + +impl OtherMetadataMap { + pub fn new() -> Self { + Self(HashMap::new()) + } +} + +impl Default for OtherMetadataMap { + fn default() -> Self { + Self::new() + } +} + +impl PartialOrd for OtherMetadataMap { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.len().partial_cmp(&other.0.len()) + } +} + use std::cmp::Ordering; use fraction::GenericFraction; @@ -19,7 +67,9 @@ impl Resolution { } /// A simple enum over the supported types of media. -#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord, serde::Serialize, serde::Deserialize, +)] pub enum MediaKind { Photo, AnimatedPhoto, @@ -75,7 +125,7 @@ impl Format { } pub fn media_kind(&self) -> MediaKind { - self.media_kind.clone() + self.media_kind } pub fn mime_type(&self) -> String { diff --git a/src/models/media/mod.rs b/src/models/media/mod.rs index 1bf2fea..d67499e 100644 --- a/src/models/media/mod.rs +++ b/src/models/media/mod.rs @@ -1,133 +1,126 @@ -use std::path::{Path, PathBuf}; - -use surrealdb::RecordId; - -use crate::{ - database::RavesDb, - error::{DatabaseError, RavesError}, - models::metadata::Metadata, +use camino::Utf8Path; +use chrono::{DateTime, Utc}; +use sqlx::{ + query::Query, + sqlite::SqliteArguments, + types::{Json, Uuid}, + Sqlite, }; -use super::{metadata::SpecificMetadata, tags::Tag, thumbnail::Thumbnail}; +use super::tags::Tag; +use crate::{database::InsertIntoTable, error::RavesError}; +use metadata::{Format, OtherMetadataMap, SpecificMetadata}; +mod builder; +pub mod hash; pub mod load; +pub mod metadata; /// Some media file. -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + Debug, + PartialEq, + PartialOrd, + serde::Serialize, + serde::Deserialize, + sqlx::FromRow, + sqlx::Encode, + sqlx::Type, +)] pub struct Media { - pub metadata: Metadata, - // - // The identifer of the media. Used for loading cached metadata, - // thumbnails, and potentially other information. - // pub id: RecordId, - // - /// The tags of a media file. Note that these can come from the file's EXIF - /// metadata or Rave's internals. - pub tags: Vec, -} - -#[derive(Clone, Debug, serde::Deserialize)] -pub struct MediaRecord { - pub id: RecordId, - pub media: Media, -} - -impl Media { - /// Updates this file's metadata in the database. - pub async fn update_metadata(path: &Path) -> Result<(), RavesError> { - // TODO: optimize using CRC32 to check if we need to update? - // might require another table..? + /// Unique ID identifying which piece of media is represented. + /// + /// This should match with the thumbnail database. + pub id: Uuid, - Self::load_from_disk(path).await.map(|_| ()) - } - - /// Returns the thumbnail from the database for this media file. - pub async fn get_thumbnail(&self, _id: &RecordId) -> Result { - // see if we have a thumbnail in the database - if let Some(thumbnail) = self.database_get_thumbnail().await? { - return Ok(thumbnail); - } - - // we havn't cached one yet... - // first, let's see if the media file contains one for us to use - // TODO: put this back if we use exiv2 again or something - // if let Some(raw_thumbnail) = self.gexif2_get_thumbnail().await? { - // // let's save the file first - // let rep = Thumbnail::new(id).await; - // rep.save_from_buffer(&raw_thumbnail, self).await?; - // } - - // the file doesn't have one either! let's make one ;D - let thumbnail = Thumbnail::new(&self.id().await?).await; - thumbnail.create().await?; // this makes the file - Ok(thumbnail) - } + /// The last known file path for this media file. + pub path: String, - pub fn specific_type(&self) -> SpecificMetadata { - self.metadata.specific.clone() - } -} + /// How large the file is, in bytes. + pub filesize: i64, -// the private impl -impl Media { - /// Grabs the path of this media file. - pub(crate) fn path(&self) -> PathBuf { - self.metadata.path.clone() - } + /// The MIME type (format) of the file. + pub format: Json, - /// Creates a string from this media file's path. - pub(crate) fn path_str(&self) -> String { - self.path().display().to_string() - } + /// The time the file was created, according to the file system. + /// + /// This could be inaccurate or missing depending on the file's source. + pub creation_date: Option>, - /// Grabs this media file's unique identifier. - async fn id(&self) -> Result { - let db = RavesDb::connect().await?; + /// The time the file was last modified, according to the file system. + /// + /// Might be inaccurate or missing. + pub modification_date: Option>, - let mut response = db - .media_info - .query("SELECT id FROM info WHERE path = $path") - .bind(("path", self.path())) - .await - .map_err(DatabaseError::QueryFailed)?; + /// The time the file was first noted by Raves. + pub first_seen_date: DateTime, - let maybe: Option = response.take(0).map_err(DatabaseError::QueryFailed)?; + /// The media's width (horizontal) in pixels. + pub width_px: u32, - maybe - .ok_or(DatabaseError::EmptyResponse(self.path_str())) - .map(|mr| mr.id) - } + /// The media's height (vertical) in pixels. + pub height_px: u32, - /// Tries to grab the thumbnail from the database, if it's there. - async fn database_get_thumbnail(&self) -> Result, RavesError> { - let (db, id) = tokio::try_join!(RavesDb::connect(), self.id())?; + /// Additional metadata that's specific to the media's kind, such as a + /// video's framerate. + pub specific_metadata: Json, - // grab thumbnail from database - let mut response = db - .thumbnails - .query("SELECT * FROM thumbnail WHERE image_id = $id") - .bind(("id", id)) - .await - .map_err(DatabaseError::QueryFailed)?; + /// Metadata that isn't immensely common, but can be read as a string. + /// + /// Or, in other words, it's a hashmap of data. + /// + /// This is stored as `Json` for the database. + pub other_metadata: Option>, - let maybe: Option = response.take(0).map_err(DatabaseError::QueryFailed)?; + /// The tags of a media file. Note that these can come from the file's EXIF + /// metadata or Rave's internals. + pub tags: Json>, +} - Ok(maybe) +impl Media { + /// Loads a media file's metadata. + /// + /// This function handles all path, hashing, and caching operations. You + /// may safely call it for anything. + #[tracing::instrument] + pub async fn load + core::fmt::Debug>(path: P) -> Result { + load::load_internal(path.as_ref()).await } +} - // /// Tries to get a thumbnail from the media file's EXIF data. - // /// - // /// Note that this is often uncommon for fully-digital media, like screenshots. - // async fn gexif2_get_thumbnail(&self) -> Result>, RavesError> { - // // check the file's properties - // let m = block_in_place(|| { - // rexiv2::Metadata::new_from_path(self.path()).map_err(|_e| { - // RavesError::MediaDoesntExist { - // path: self.path_str(), - // } - // }) - // })?; - - // Ok(m.get_thumbnail().map(|bstr| bstr.to_vec())) - // } +impl InsertIntoTable for Media { + fn make_insertion_query(&self) -> Query<'_, Sqlite, SqliteArguments<'_>> { + sqlx::query!( + r#" + INSERT INTO info + (id, path, filesize, format, creation_date, modification_date, first_seen_date, width_px, height_px, specific_metadata, other_metadata, tags) + VALUES + ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) + ON CONFLICT(id) + DO UPDATE SET + path = excluded.path, + filesize = excluded.filesize, + format = excluded.format, + creation_date = excluded.creation_date, + width_px = excluded.width_px, + height_px = excluded.height_px, + specific_metadata = excluded.specific_metadata, + other_metadata = excluded.other_metadata, + tags = excluded.tags; + "#, + self.id, + self.path, + self.filesize, + self.format, + self.creation_date, + self.modification_date, + self.first_seen_date, + self.width_px, + self.height_px, + self.specific_metadata, + self.other_metadata, + self.tags + ) + } } diff --git a/src/models/metadata/builder/image_crate.rs b/src/models/metadata/builder/image_crate.rs deleted file mode 100644 index 8874924..0000000 --- a/src/models/metadata/builder/image_crate.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! A builder for basic metadata using the `image` crate. -//! -//! Note that this is effectively a fallback for when there is no other metadata -//! available. - -use std::path::Path; - -use image::GenericImageView as _; - -use crate::{ - error::RavesError, - models::metadata::{ - builder::get_video_len, - types::{Format, MediaKind, Resolution}, - SpecificMetadata, - }, -}; - -use super::MetadataBuilder; - -impl MetadataBuilder { - /// Applies EXIF data from `image` to `self`. - #[tracing::instrument(skip(self))] - pub(super) async fn apply_image( - &mut self, - path: &Path, - format: Format, - ) -> Result<(), RavesError> { - // read the image into a buffer and grab its dimensions - let img = - image::open(path).map_err(|e| RavesError::ImageError(path.display().to_string(), e))?; - let (width, height) = img.dimensions(); - tracing::debug!("got image dimensions from image crate: {width}x{height}"); - - // apply format - let media_kind = format.media_kind(); - self.format = Some(format); - - // resolution - self.resolution = Some(Resolution::new(width, height)); - tracing::debug!("got resolution from image!"); - - // specific - self.specific = Some(match media_kind { - MediaKind::Photo => SpecificMetadata::Image {}, - MediaKind::Video => { - tracing::warn!("video detected, but the image crate doesn't handle videos!"); - get_video_len(path)? - } - MediaKind::AnimatedPhoto => unimplemented!(), - }); - - Ok(()) - } -} diff --git a/src/models/metadata/builder/kamadak.rs b/src/models/metadata/builder/kamadak.rs deleted file mode 100644 index ab6516f..0000000 --- a/src/models/metadata/builder/kamadak.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::path::Path; - -use kamadak_exif::{Exif as KamadakExif, In, Tag}; -use tokio::try_join; - -use crate::{ - error::RavesError, - models::metadata::{ - builder::get_video_len, - types::{Format, MediaKind, Resolution}, - OtherMetadataMap, OtherMetadataValue, SpecificMetadata, - }, -}; - -use super::MetadataBuilder; - -impl MetadataBuilder { - /// Applies EXIF data from `kamadak_exif` to `self`. - #[tracing::instrument(skip(self))] - pub(super) async fn apply_kamadak_exif( - &mut self, - path: &Path, - format: Format, - ) -> Result<(), RavesError> { - let (_, exif) = try_join! { - self.file(path), - look(path), - }?; - - tracing::debug!("got exif data from kamadak-exif!"); - - let p = In::PRIMARY; - - let err = |msg: &str| { - RavesError::FileMissingMetadata(path.display().to_string(), msg.to_string()) - }; - - tracing::debug!("looking for exif data..."); - - // resolution - let kamadak_exif::Value::Long(ref w) = exif - .get_field(Tag::ImageWidth, p) - .ok_or(err("no width"))? - .value - else { - return Err(err("no width")); - }; - let kamadak_exif::Value::Long(ref h) = exif - .get_field(Tag::ImageLength, p) - .ok_or(err("no height"))? - .value - else { - return Err(err("no height")); - }; - - self.resolution = Some(Resolution::new( - *w.first().ok_or(err("no width"))?, - *h.first().ok_or(err("no height"))?, - )); - tracing::debug!("got resolution from exif!"); - - // specific - self.specific = Some(match format.media_kind() { - MediaKind::Photo => SpecificMetadata::Image {}, - MediaKind::Video => get_video_len(path)?, - MediaKind::AnimatedPhoto => unimplemented!(), - }); - tracing::debug!("got specific metadata from exif!"); - - // other - let mut mapped = OtherMetadataMap::new(); - for field in exif.fields() { - let key = field.tag.to_string(); - let value = OtherMetadataValue { - user_facing_name: Some(key.clone()), - value: field.display_value().to_string(), - }; - - mapped.0.insert(key, value); - } - self.other = Some(Some(mapped)); - tracing::debug!("got other metadata from exif!"); - - tracing::debug!("finished looking for exif data!"); - - Ok(()) - } -} - -// to get rid of that god-forsaken `JoinError` -async fn look(path: &Path) -> Result { - let path = path.to_path_buf(); - let path_str = path.display().to_string(); - - tokio::task::spawn_blocking(|| -> Result { - let mut file = std::fs::File::open(path).map_err(|_e| RavesError::MediaDoesntExist { - path: path_str.clone(), - })?; - - let mut buf_reader = std::io::BufReader::new(&mut file); - let exif_reader = kamadak_exif::Reader::new(); - exif_reader - .read_from_container(&mut buf_reader) - .map_err(|e| RavesError::KamadakExifError(path_str, e)) - }) - .await - .map_err(RavesError::TokioJoinError)? -} diff --git a/src/models/metadata/builder/mod.rs b/src/models/metadata/builder/mod.rs deleted file mode 100644 index 74ea07a..0000000 --- a/src/models/metadata/builder/mod.rs +++ /dev/null @@ -1,265 +0,0 @@ -//! # Metadata Builders -//! -//! Assists in ascertaining metadata of different kinds in one format. -//! -//! Note that this should eventually be replaced with a fleshed-out library -//! with full support for all these types! - -pub mod image_crate; -pub mod kamadak; -pub mod matroska; -pub mod mp4parse; -pub mod nom; - -use std::{ - os::unix::fs::MetadataExt, - path::{Path, PathBuf}, - time::SystemTime, -}; - -use infer::{video::*, Type}; -use tokio::io::AsyncReadExt; - -use crate::{ - error::RavesError, - models::metadata::{ - types::{Filesize, Format, MediaKind, Resolution}, - Metadata, OtherMetadataMap, SpecificMetadata, - }, -}; - -/// A media file's metadata. Common metadata is always present, while the `other` -/// field represents that which isn't standard in a dictionary (string, string) -/// form. -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] -pub struct MetadataBuilder { - // # file - pub path: Option, - pub filesize: Option, - pub creation_date: Option>, - pub modified_date: Option>, - - // # format - pub format: Option, - - // # exif - pub resolution: Option, - pub specific: Option, - pub other: Option>, - - // # raves-specific - pub first_seen_date: SystemTime, -} - -impl MetadataBuilder { - /// Returns metadata found from a file. - #[tracing::instrument] - pub async fn apply

(mut self, path: P) -> Result - where - P: AsRef + std::fmt::Debug, - { - let path = path.as_ref(); - - // get the format - tracing::debug!("grabbing format..."); - let (format, _inferred) = Self::format(path).await?; - - // apply format - tracing::debug!("applying format..."); - let media_kind = format.media_kind(); - self.format = Some(format.clone()); - - tracing::debug!("applying metadata..."); - match media_kind { - MediaKind::Photo => { - // kamadak-exif has a lot of photo formats - let kamadak = self.apply_kamadak_exif(path, format.clone()).await; - if kamadak.is_ok() { - return self.build().await; - } - - // fallback to image crate - tracing::warn!("couldn't get metadata from kamadak-exif. using image crate..."); - self.apply_image(path, format).await?; - } - MediaKind::Video => { - // nom_exif supports mp4 and mov. - // TODO: other crates for more formats? - let nom = self.apply_nom_exif(path, format.clone()).await; - if nom.is_ok() { - return self.build().await; - } - - tracing::warn!("couldn't get metadata from nom_exif. using video fallbacks..."); - - // let's read the first 38 bytes of the file. - // that lets us check the actual container type - let mut buf = [0; 38]; - tokio::fs::File::open(path) - .await - .map_err(|e| RavesError::FileMetadataFailure { - path: path.display().to_string(), - err: e, - })? - .read_exact(&mut buf) - .await - .map_err(|e| RavesError::FileMetadataFailure { - path: path.display().to_string(), - err: e, - })?; - - // use generic crates for exif-less containers - if is_mp4(&buf) { - tracing::warn!("detected mp4 container. using mp4parse..."); - self.apply_mp4parse(path, format).await?; - } else if is_mov(&buf) || is_mkv(&buf) || is_webm(&buf) { - tracing::warn!("detected matroska container. using matroska crate..."); - self.apply_matroska(path, format).await?; - } else { - tracing::error!( - "an unsupported video container was detected. trying ffmpeg..." - ); - unimplemented!() - } - } - MediaKind::AnimatedPhoto => unimplemented!(), - }; - - tracing::debug!("finished applying metadata!"); - self.build().await - } -} - -// private methods -impl MetadataBuilder { - /// Adds typical file attributes to `self`. - #[tracing::instrument(skip(self))] - async fn file(&mut self, path: &Path) -> Result<(), RavesError> { - let path_str = path.display().to_string(); - - // err if the file doesn't open - let metadata = tokio::fs::metadata(path) - .await - .map_err(|_e| RavesError::MediaDoesntExist { path: path_str })?; - tracing::debug!("got file metadata!"); - - self.path = Some(path.to_path_buf()); - self.filesize = Some(Filesize(metadata.size())); - self.creation_date = Some(metadata.created().ok()); - self.modified_date = Some(metadata.modified().ok()); - tracing::debug!("added file metadata to builder!"); - - Ok(()) - } - - /// Grabs the format of the media file at `path`. - #[tracing::instrument] - async fn format(path: &Path) -> Result<(Format, Type), RavesError> { - let path_str = path.display().to_string(); - - tracing::debug!("grabbing format..."); - let mime = infer::get_from_path(path) - .map_err(|_e| RavesError::MediaDoesntExist { - path: path_str.clone(), - })? - .ok_or(RavesError::NoMimeData { - path: path_str.clone(), - })?; - - // aaaand make the format - tracing::debug!("creating mime type for media file..."); - - Ok(( - Format::new_from_mime(mime.mime_type()).unwrap(), - // Format::new_from_mime(mime.mime_type()).ok_or(RavesError::FileNotSupportedMedia { - // path: path_str.clone(), - // })?, - mime, - )) - } - - /// Builds the metadata from the data gathered. - /// - /// This will return a None if no file metadata could be gathered. - #[tracing::instrument(skip(self))] - async fn build(self) -> Result { - let path_str = self - .path - .as_ref() - .map(|p| p.display().to_string()) - .unwrap_or("no path given".into()); - - Ok(Metadata { - path: self.path.ok_or(RavesError::FileMissingMetadata( - path_str.clone(), - "no path given".into(), - ))?, - filesize: self.filesize.ok_or(RavesError::FileMissingMetadata( - path_str.clone(), - "no file size given".into(), - ))?, - creation_date: self.creation_date.flatten(), - modified_date: self.modified_date.flatten(), - - format: self.format.ok_or(RavesError::FileMissingMetadata( - path_str.clone(), - "no format given".into(), - ))?, - resolution: self.resolution.ok_or(RavesError::FileMissingMetadata( - path_str.clone(), - "no resolution given".into(), - ))?, - - specific: self.specific.ok_or(RavesError::FileMissingMetadata( - path_str.clone(), - "no specific metadata given".into(), - ))?, - other: self.other.flatten(), - - // FIXME: HEYYYYY! THIS IS WRONG: MUST CHECK DATABASE!!! - first_seen_date: self.first_seen_date, - }) - } -} - -impl Default for MetadataBuilder { - fn default() -> Self { - Self { - path: None, - resolution: None, - filesize: None, - format: None, - creation_date: None, - modified_date: None, - first_seen_date: SystemTime::now(), - specific: None, - other: None, - } - } -} - -/// Grabs the video length of a media file using FFmpeg. -pub fn get_video_len(path: &Path) -> Result { - let path_str = path.display().to_string(); - - // let's ask ffmpeg what it thinks - tracing::trace!("video detected. asking ffmpeg to handle..."); - ffmpeg_next::init()?; - let t = ffmpeg_next::format::input(path).map_err(|e| RavesError::FfmpegFailedProcessing { - path: path_str.clone(), - err: e.to_string(), - })?; - - // grab the first video stream and see how long it is - let video_length = t - .streams() - .find(|s| s.parameters().medium() == ffmpeg_next::media::Type::Video) - .map(|s| (ffmpeg_next::Rational::new(s.duration() as i32, 1)) * s.time_base()) - .map(|s| s.0 as f64 / s.1 as f64) - .unwrap_or(0_f64); - tracing::trace!("video len is {video_length}."); - - Ok(SpecificMetadata::Video { - length: video_length, - }) -} diff --git a/src/models/metadata/mod.rs b/src/models/metadata/mod.rs deleted file mode 100644 index 4690a96..0000000 --- a/src/models/metadata/mod.rs +++ /dev/null @@ -1,75 +0,0 @@ -pub mod builder; -pub mod types; - -use std::{collections::HashMap, path::PathBuf, time::SystemTime}; - -use types::{Filesize, Format, Framerate, Resolution}; - -/// A media file's metadata. Common metadata is always present, while the `other` -/// field represents that which isn't standard in a dictionary (string, string) -/// form. -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] -pub struct Metadata { - pub path: PathBuf, - pub filesize: Filesize, - pub creation_date: Option, - pub modified_date: Option, - - /// The MIME type for the media file. - pub format: Format, - - pub resolution: Resolution, - /// Any kind-specific metadata (e.g. video framerate, etc.) - pub specific: SpecificMetadata, - /// Metadata that isn't immensely common, but can be read as a string. - pub other: Option, - - /// When Raves first saw this file. - pub first_seen_date: SystemTime, -} - -/// Metadata "specific" to one type of media. -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] -pub enum SpecificMetadata { - #[non_exhaustive] - Image {}, - - #[non_exhaustive] - AnimatedImage { - frame_count: u32, - framerate: Framerate, - }, - - #[non_exhaustive] - Video { length: f64 }, -} - -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] -pub struct OtherMetadataValue { - pub user_facing_name: Option, - pub value: String, -} - -/// A representation for uncommon metadata that can only be read. -/// -/// Also, it's a `HashMap` newtype to get around the lack of `PartialOrd`. -#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)] -pub struct OtherMetadataMap(pub HashMap); - -impl OtherMetadataMap { - pub fn new() -> Self { - Self(HashMap::new()) - } -} - -impl Default for OtherMetadataMap { - fn default() -> Self { - Self::new() - } -} - -impl PartialOrd for OtherMetadataMap { - fn partial_cmp(&self, other: &Self) -> Option { - self.0.len().partial_cmp(&other.0.len()) - } -} diff --git a/src/models/mod.rs b/src/models/mod.rs index 3b4b123..e53dbe2 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -1,4 +1,3 @@ pub mod media; -pub mod metadata; pub mod tags; pub mod thumbnail; diff --git a/src/models/tags.rs b/src/models/tags.rs index 33ef07c..cd208bf 100644 --- a/src/models/tags.rs +++ b/src/models/tags.rs @@ -1,6 +1,6 @@ //! Represents tags in all their glory. -use surrealdb::sql::Thing; +use uuid::Uuid; pub type TagIdent = String; @@ -47,11 +47,11 @@ pub struct Tag { #[derive(Clone, Debug, PartialEq, PartialOrd, serde::Deserialize)] pub struct TagRecord { pub tag: Tag, - pub id: Thing, + pub id: Uuid, } #[derive(Clone, Debug, PartialEq, PartialOrd, serde::Deserialize)] pub struct TagSectionRecord { pub section: TagSection, - pub id: Thing, + pub id: Uuid, } diff --git a/src/models/thumbnail.rs b/src/models/thumbnail.rs index 1ab08e9..7864f76 100644 --- a/src/models/thumbnail.rs +++ b/src/models/thumbnail.rs @@ -3,6 +3,7 @@ //! This includes generating thumbnails for media (and caching them), alongside //! grabbing thumbnails from media created by a camera or device. +use camino::Utf8PathBuf; use ffmpeg_next::{ codec::context::Context, filter::{self, Graph}, @@ -10,44 +11,46 @@ use ffmpeg_next::{ frame, }; use image::imageops::FilterType; -use std::{io::BufWriter, path::PathBuf}; -use surrealdb::RecordId; +use std::io::BufWriter; +use uuid::Uuid; use ffmpeg_next::util::frame::video::Video; use crate::{ config::Config, - database::RavesDb, - error::{DatabaseError, RavesError, ThumbnailError}, - models::{media::Media, metadata::SpecificMetadata}, + database::DATABASE, + error::{RavesError, ThumbnailError}, + models::media::{metadata::SpecificMetadata, Media}, }; -#[derive(Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, Debug, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize, sqlx::FromRow, +)] pub struct Thumbnail { /// a UNIQUE path to the thumbnail file. - path: PathBuf, + path: String, /// the id of the original media file in the database. - image_id: RecordId, + image_id: Uuid, } impl Thumbnail { const SIZE: u32 = 512; /// Creates a new thumbnail representation given an image ID. - pub async fn new(image_id: &RecordId) -> Self { + pub async fn new(image_id: &Uuid) -> Self { // note: the path to a thumbnail is static from its id. let path = Self::make_path(image_id).await; Self { - path, - image_id: image_id.clone(), + path: path.to_string(), + image_id: *image_id, } } /// Makes a real thumbnail file for this representation. It'll be saved to disk. pub async fn create(&self) -> Result<(), RavesError> { // avoid recreating thumbnails - if self.path.exists() { + if self.path().exists() { tracing::trace!("attempted to create thumbnail, but it already exists"); return Ok(()); } @@ -58,13 +61,13 @@ impl Thumbnail { let media = media_ext.clone(); // ok we have the media. let's use it - let thumbnail_buffer = match media.specific_type() { + let thumbnail_buffer = match media.specific_metadata.0.clone() { SpecificMetadata::Image { .. } => { // let's read it into a buffer - tokio::fs::read(media.path()) + tokio::fs::read(&media.path) .await .map_err(|_e| RavesError::MediaDoesntExist { - path: media.path_str(), + path: media.path.clone(), })? } @@ -75,11 +78,11 @@ impl Thumbnail { ffmpeg_next::init()?; // let's start out by finding that change - let mut input = input(&media.path())?; + let mut input = input(&media.path)?; let input_stream = input .streams() .best(ffmpeg_next::media::Type::Video) - .ok_or(RavesError::FfmpegNoGoodVideoStreams(media.path_str()))?; + .ok_or(RavesError::FfmpegNoGoodVideoStreams(media.path.clone()))?; let codec = Context::from_parameters(input_stream.parameters().to_owned())?; let mut decoder = codec.decoder().video()?; let input_stream_index = input_stream.index(); @@ -144,7 +147,7 @@ impl Thumbnail { // we should have a scene frame now. let's modify and save! Ok(scene_frame - .ok_or(ThumbnailError::FfmpegNoSelectedFilter(media.path_str()))? + .ok_or(ThumbnailError::FfmpegNoSelectedFilter(media.path.clone()))? .data(0) .to_vec()) }) @@ -160,40 +163,40 @@ impl Thumbnail { // all done! let's brag tracing::trace!( "successfully generated thumbnail for media file at `{}`!", - media_ext.path_str() + media_ext.path.clone() ); todo!() } /// Grabs the path to the thumbnail. - pub fn path(&self) -> &PathBuf { - &self.path + pub fn path(&self) -> Utf8PathBuf { + Utf8PathBuf::from(self.path.clone()) } /// Represents this thumbnail's path as a string. pub fn path_str(&self) -> String { - self.path().display().to_string() + self.path().to_string() } /// Grabs the ID of the original media file. - pub fn image_id(&self) -> &RecordId { + pub fn image_id(&self) -> &Uuid { &self.image_id } pub async fn save_from_buffer(&self, buf: &[u8], media: &Media) -> Result<(), RavesError> { let thumbnail = { let img = image::load_from_memory(buf) - .map_err(|e| ThumbnailError::ImageParsingFailed(e, media.path_str()))?; + .map_err(|e| ThumbnailError::ImageParsingFailed(e, media.path.clone()))?; img.resize_to_fill(Self::SIZE, Self::SIZE, FilterType::Nearest) }; let file = std::fs::File::create(self.path()) - .map_err(|e| ThumbnailError::ThumbnailSaveFailure(e, self.path_str()))?; + .map_err(|e| ThumbnailError::ThumbnailSaveFailure(e, self.path.clone()))?; let mut writer = BufWriter::new(file); - let path_str = self.path_str(); + let path_str = self.path.clone(); // let's save it as blessed avif tokio::task::spawn_blocking(move || -> Result<(), ThumbnailError> { @@ -209,8 +212,8 @@ impl Thumbnail { impl Thumbnail { /// Makes a unique thumbnail path from an image's unique ID. - async fn make_path(image_id: &RecordId) -> PathBuf { - let filename = PathBuf::from(format!("{}.thumbnail", image_id.key())); + async fn make_path(image_id: &Uuid) -> Utf8PathBuf { + let filename = Utf8PathBuf::from(format!("{image_id}.thumbnail")); Config::read() .await .cache_dir @@ -221,13 +224,43 @@ impl Thumbnail { /// Returns the media file representation that this thumbnail is for. async fn get_media(&self) -> Result { - let entry: Option = RavesDb::connect() - .await? - .media_info - .select(self.image_id.clone()) - .await - .map_err(DatabaseError::QueryFailed)?; + let mut conn = DATABASE.acquire().await?; + + let media = sqlx::query_as::<_, Media>("SELECT * FROM info WHERE id = $1") + .bind(self.image_id) + .fetch_one(&mut *conn) + .await?; + + Ok(media) + } +} + +impl Media { + /// Returns the thumbnail from the database for this media file. + #[tracing::instrument] + pub async fn get_thumbnail(&self, _id: &Uuid) -> Result { + // see if we have a thumbnail in the database + if let Some(thumbnail) = self.database_get_thumbnail().await? { + return Ok(thumbnail); + } + + // the file doesn't have one either! let's make one ;D + let thumbnail = Thumbnail::new(&self.id).await; + thumbnail.create().await?; // this makes the file + Ok(thumbnail) + } + + /// Tries to grab the thumbnail from the database, if it's there. + #[tracing::instrument] + async fn database_get_thumbnail(&self) -> Result, RavesError> { + let mut conn = DATABASE.acquire().await?; + + let thumbnail = + sqlx::query_as::<_, Thumbnail>("SELECT * FROM thumbnail WHERE image_id = $1") + .bind(self.id.to_string()) + .fetch_one(&mut *conn) + .await?; - entry.ok_or(ThumbnailError::MediaNotFound(self.image_id().to_string()).into()) + Ok(Some(thumbnail)) } } diff --git a/src/search/details.rs b/src/search/details.rs index 94fd550..fb7f5e0 100644 --- a/src/search/details.rs +++ b/src/search/details.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; -use crate::models::metadata::types::Framerate; +use crate::models::media::metadata::Framerate; use jiff::Zoned; diff --git a/src/search/modifiers.rs b/src/search/modifiers.rs index 3436f7e..fcfc799 100644 --- a/src/search/modifiers.rs +++ b/src/search/modifiers.rs @@ -1,5 +1,3 @@ -use surrealdb::sql::Value; - use super::details::{DateDetail, FormatDetail, KindDetail, TagDetail}; #[derive(Clone, Debug, PartialEq, PartialOrd)] @@ -53,7 +51,7 @@ pub enum Expr { pub struct PreExecutionQuery { pub query: String, - pub parameters: Vec, + // pub parameters: Vec, // FIXME: no clue what i was cookin here. `surrealql::Value`..? } // /// A modifier must become a query to be used. diff --git a/src/search/sort.rs b/src/search/sort.rs index 5cdebb8..eadf724 100644 --- a/src/search/sort.rs +++ b/src/search/sort.rs @@ -2,8 +2,7 @@ use core::mem; -use crate::models::media::Media; -use crate::models::metadata::SpecificMetadata; +use crate::models::media::{metadata::SpecificMetadata, Media}; pub struct PreparedQuery { pub initial_select: String, // something like "SELECT * FROM info" @@ -55,20 +54,16 @@ impl FinishedQuery { match ty { SortType::Random => v.shuffle(&mut thread_rng()), - SortType::DateFirstSeen => { - v.sort_by(|a, b| a.metadata.first_seen_date.cmp(&b.metadata.first_seen_date)) - } + SortType::DateFirstSeen => v.sort_by(|a, b| a.first_seen_date.cmp(&b.first_seen_date)), SortType::DateModified => { - v.sort_by(|a, b| a.metadata.modified_date.cmp(&b.metadata.modified_date)) - } - SortType::DateCreated => { - v.sort_by(|a, b| a.metadata.creation_date.cmp(&b.metadata.creation_date)) + v.sort_by(|a, b| a.modification_date.cmp(&b.modification_date)) } + SortType::DateCreated => v.sort_by(|a, b| a.creation_date.cmp(&b.creation_date)), SortType::TagCount => v.sort_by(|a, b| a.tags.len().cmp(&b.tags.len())), - SortType::Type => v.sort_by(|a, b| a.metadata.format.cmp(&b.metadata.format)), - SortType::Size => v.sort_by(|a, b| a.metadata.filesize.cmp(&b.metadata.filesize)), + SortType::Type => v.sort_by(|a, b| a.format.cmp(&b.format)), + SortType::Size => v.sort_by(|a, b| a.filesize.cmp(&b.filesize)), SortType::Resolution => { - v.sort_by(|a, b| a.metadata.resolution.cmp(&b.metadata.resolution)) + v.sort_by(|a, b| (a.width_px + a.height_px).cmp(&(b.width_px + b.height_px))) } // this one is different b/c it relies on a sort specific to videos. @@ -85,7 +80,7 @@ impl FinishedQuery { // split the vec into photos and videos for media in vec.into_iter() { - match media.metadata.specific { + match media.specific_metadata.0 { SpecificMetadata::Image {} => photos.push(media), SpecificMetadata::Video { length } => videos.push((media, length)), _ => unreachable!("animated images aren't yet distinct from photos"), @@ -96,7 +91,7 @@ impl FinishedQuery { videos.sort_by(|(_, a_len), (_, b_len)| a_len.total_cmp(b_len)); // always sort photos by the creation date (this sucks but whatever) - photos.sort_by(|a, b| a.metadata.creation_date.cmp(&b.metadata.creation_date)); + photos.sort_by(|a, b| a.creation_date.cmp(&b.creation_date)); #[cfg(debug_assertions)] assert!(v.is_empty(), "the original vec should still be empty here"); @@ -131,26 +126,22 @@ impl FinishedQuery { #[cfg(test)] mod tests { - use std::{path::PathBuf, time::SystemTime}; + use chrono::Utc; + use sqlx::types::Json; + use uuid::Uuid; - use crate::models::metadata::{ - types::{Filesize, Format, Resolution}, - Metadata, - }; + use crate::models::media::metadata::Format; use super::*; #[tokio::test] async fn sort_by_size() { let mut v: Vec = Vec::new(); - for i in 0..10 { - v.push(Media { - metadata: { - let mut m = create_default_metadata(); - m.filesize = Filesize(i as u64 * 1024); - m - }, - tags: vec![], + for len in 0..=10 { + v.push({ + let mut m = create_default_media(); + m.filesize = len as i64 * 1024; + m }); } @@ -169,24 +160,18 @@ mod tests { async fn sort_by_duration() { let mut v: Vec = Vec::new(); - v.push(Media { - metadata: { - let mut m = create_default_metadata(); - m.filesize = Filesize(2_000_000); - m - }, - tags: vec![], + v.push({ + let mut m = create_default_media(); + m.filesize = 2_000_000; + m }); for len in 1..=10 { - v.push(Media { - metadata: { - let mut m = create_default_metadata(); - m.specific = SpecificMetadata::Video { length: len as f64 }; - m.filesize = Filesize(len as u64 * 1024); - m - }, - tags: vec![], + v.push({ + let mut m = create_default_media(); + *m.specific_metadata = SpecificMetadata::Video { length: len as f64 }; + m.filesize = len as i64 * 1024; + m }); } @@ -199,7 +184,7 @@ mod tests { impl F for Media { fn get_length(&self) -> f64 { - if let SpecificMetadata::Video { length } = self.metadata.specific.clone() { + if let SpecificMetadata::Video { length } = self.specific_metadata.clone().0 { length } else { 0_f64 @@ -223,17 +208,20 @@ mod tests { ); } - fn create_default_metadata() -> Metadata { - Metadata { - path: PathBuf::from("a"), - resolution: Resolution::new(1920, 1080), - filesize: Filesize(1024), - format: Format::new_from_mime("image/jpeg").unwrap(), + fn create_default_media() -> Media { + Media { + id: Uuid::nil(), + path: "a".into(), + filesize: 1024, + format: Json(Format::new_from_mime("image/jpeg").unwrap()), creation_date: None, - modified_date: None, - first_seen_date: SystemTime::now(), - specific: SpecificMetadata::Image {}, - other: None, + modification_date: None, + first_seen_date: Utc::now(), + width_px: 1920, + height_px: 1080, + specific_metadata: Json(SpecificMetadata::Image {}), + other_metadata: None, + tags: Json(vec![]), } } } diff --git a/src/types/animated_image.rs b/src/types/animated_image.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/types/image.rs b/src/types/image.rs deleted file mode 100644 index c833b04..0000000 --- a/src/types/image.rs +++ /dev/null @@ -1,25 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use super::{ - metadata::{FileName, FileSize, Metadata, Resolution}, - tag::TagIdent, - Media, -}; - -/// General forms of metadata found on an image. These are the searchable -/// kinds. -/// -/// You may wish to see `EtcMetadata` for less common fields. -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub struct ImageMetadata { - resolution: Resolution, - file_size: FileSize, - file_name: FileName, - tags: Vec, -} - -pub type Image = Media; - -impl Image {} - -impl Metadata for ImageMetadata {} diff --git a/src/types/metadata.rs b/src/types/metadata.rs deleted file mode 100644 index 2bdbda9..0000000 --- a/src/types/metadata.rs +++ /dev/null @@ -1,15 +0,0 @@ -use serde::{Deserialize, Serialize}; - -pub trait Metadata {} - -pub type FileSize = u64; -pub type FileName = String; -pub type CreatedDate = u64; -pub type ModifiedDate = u64; -pub type FirstSeenDate = u64; - -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub struct Resolution { - pub x: u32, - pub y: u32, -} diff --git a/src/types/mod.rs b/src/types/mod.rs deleted file mode 100644 index 64ea8f2..0000000 --- a/src/types/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Types that are really the bedrock of the app. - -use metadata::Metadata; - -pub mod animated_image; -pub mod image; -pub mod metadata; -pub mod tag; -pub mod video; - -/// Some kind of media file (image, animated image, video, etc.) -pub struct Media { - metadata: Meta, -} diff --git a/src/types/tag.rs b/src/types/tag.rs deleted file mode 100644 index 116ebd8..0000000 --- a/src/types/tag.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! Represents tags in all their glory. - -use sea_orm::prelude::Uuid; -use serde::{Deserialize, Serialize}; - -pub type TagIdent = Uuid; - -/// A "section" for tags. When a tag has a section, it is separated from others -/// by extreme differences. -/// -/// For example, it might make absolutely zero sense to sort a vacation and -/// anime artwork using the same tags. -/// -/// Instead, separate them with -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub struct TagSection { - identifer: String, -} - -impl Default for TagSection { - /// Creates THE default `TagSection`, simply titled "default". - fn default() -> Self { - Self { - identifer: String::from("default"), - } - } -} - -impl TagSection {} - -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub struct Tag { - /// A unique name describing this tag. Don't use this to find the tag - EVER. - /// The name can change, but a tag's UUID is forever static. - name: String, - /// A unique identifier. Always use this when referencing the tag externally. - uuid: TagIdent, - /// The section this tag belongs to. - tag_section: Option, - /// The other tags this tag "implies". For example, tags "christmas" and - /// "halloween" would both imply the "holiday" tag. - implies: Vec, -} diff --git a/src/types/video.rs b/src/types/video.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/watch.rs b/src/watch.rs new file mode 100644 index 0000000..45cb498 --- /dev/null +++ b/src/watch.rs @@ -0,0 +1,150 @@ +//! Watches for changes inside of given folders. +//! +//! When a new file appears, it'll be added into the database! If an existing +//! file changes, it'll have its metadata reviewed and, if necessary, changed +//! in the database. + +use std::{path::Path, time::Duration}; + +use async_watcher::{notify::RecursiveMode, AsyncDebouncer, DebouncedEventKind}; +use camino::Utf8Path; + +use futures::stream::StreamExt; + +use crate::{config::Config, models::media::Media}; + +/// A 'daemon' that watches for file changes and +pub struct Watch; + +impl Watch { + /// Begins watching for file changes. When it detects one, it'll update + /// metadata when necessary. + /// + /// NOTE: You should use this with `tokio::spawn`. + #[tracing::instrument(skip_all)] + pub async fn watch() { + tracing::info!("starting watcher..."); + let (mut debouncer, mut file_events) = + AsyncDebouncer::new_with_channel(Duration::from_millis(2000), None) + .await + .expect("watcher should be configured correctly"); + + let paths = Config::read().await.watched_paths.clone(); + tracing::debug!("got the following paths: {paths:?}"); + + let watcher = debouncer.watcher(); + for path in &paths { + let res = watcher + .watch(path.as_std_path(), RecursiveMode::Recursive) + .inspect_err(|e| { + tracing::warn!("Failed to start watching folder! err: {e}, path: `{path}`") + }); + + if res.is_ok() { + tracing::info!("The file watcher is now watching path: `{path}`"); + } + } + + // start off by checking metadata for all watched files + { + tracing::info!("The watcher is now online! Performing initial scan on all files..."); + + let stream = tokio_stream::iter(paths.into_iter()) + .map(|p| async move { + Self::handle_dir(p.as_std_path().to_path_buf()).await; + }) + .buffered(5); + + tokio_stream::StreamExt::chunks_timeout( + stream, + 3, + Duration::from_millis(1000 * 60 * 10), + ) + .collect::>() + .await; + + tracing::info!("Initial scan complete!"); + } + + // TODO: keep up with config changes. that'll require `select!` and + // some other task w/ `mpsc` + // + // TODO(2025-01-16): hey i bet we can just restart the watcher instead... lol + + // when anything changes, we must scan its ENTIRE directory. + // see: https://github.com/notify-rs/notify/issues/412 + while let Some(res_ev) = file_events.recv().await { + tracing::debug!("File event received! ev: {res_ev:?}"); + + // if it's an error, complain and move on... + let Ok(events) = res_ev else { + tracing::warn!("File watcher failed to handle event: {res_ev:?}"); + continue; + }; + + // spawn tasks to asynchronously handle the events + for event in events { + tracing::debug!("Handling event... ev: {event:?}"); + + if event.kind == DebouncedEventKind::Any { + // files will have their metadata updated. + // + // folders will be further split into subtasks for each + // contained file to be updated. + if event.path.is_dir() { + tokio::spawn(Self::handle_dir(event.path.clone())); + } else { + tokio::spawn(Self::handle_file(event.path.clone())); + } + } + } + } + + tracing::debug!("Watcher has died! New file changes will not be detected."); + } + + #[tracing::instrument(skip_all)] + async fn handle_file(path: impl AsRef) { + let path = path.as_ref(); + let path_str = path.to_string_lossy(); + tracing::debug!("Working on file at `{path_str}`..."); + + // give up if we don't have a utf-8 path. + // + // this shouldn't occur on Linux/Android, so we're chillin + let Some(utf8_path) = Utf8Path::from_path(path) else { + tracing::warn!("Failed to process file, as its path wasn't UTF-8. path: `{path_str}`"); + return; + }; + + // actually perform the update + let _media = Media::load(utf8_path).await.inspect_err(|e| { + tracing::error!("Failed to update metadata for file at `{path_str}`. See error: `{e}`") + }); + + tracing::debug!("Completed file at `{path_str}`!"); + } + + #[tracing::instrument(skip_all)] + async fn handle_dir(path: impl AsRef) { + let path = path.as_ref(); + let path_str = path.to_string_lossy(); + tracing::debug!("Handling directory at `{path_str}`..."); + + // we'll need to 'walk' the folder to flatten out its contents! + let mut walk_dir = async_walkdir::WalkDir::new(path); + + while let Some(res_entry) = walk_dir.next().await { + // when we hit an error, report it and move on... + let Ok(entry) = res_entry else { + tracing::warn!("Walking directory failed. See: {res_entry:?}"); + continue; + }; + + // spawn a new task for each file! just to keep things quick + tokio::spawn(Self::handle_file(entry.path())); + } + + tracing::debug!("Completed directory at `{path_str}`!"); + } +} diff --git a/src/watch/mod.rs b/src/watch/mod.rs deleted file mode 100644 index 5546d61..0000000 --- a/src/watch/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -//! Watches for changes inside of given folders. -//! -//! When a new file appears, it'll be added into the database! If an existing -//! file changes, it'll have its metadata reviewed and, if necessary, changed -//! in the database. - -use std::{path::Path, time::Duration}; - -use async_watcher::{AsyncDebouncer, DebouncedEventKind}; -use tokio::sync::RwLock; - -use futures::stream::StreamExt; - -use crate::{config::Config, models::media::Media}; - -pub struct Watch; - -impl Watch { - #[tracing::instrument(skip_all)] - pub async fn watch(conf: RwLock) { - tracing::info!("starting watcher..."); - let (mut debouncer, mut file_events) = - AsyncDebouncer::new_with_channel(Duration::from_millis(100), None) - .await - .expect("watcher should be configured correctly"); - - let paths = conf.read().await.watched_paths.clone(); - tracing::debug!("got the following paths: {paths:?}"); - - let watcher = debouncer.watcher(); - _ = paths - .iter() - .map(|p| watcher.watch(p, async_watcher::notify::RecursiveMode::Recursive)); - - // start off by checking metadata for all watched files - tracing::info!("the watcher is now online. performing initial scan on all files..."); - - let stream = tokio_stream::iter(paths.iter()) - .map(|p| async move { - Self::handle_dir(p).await; - }) - .buffered(5); - - tokio_stream::StreamExt::chunks_timeout(stream, 3, Duration::from_millis(1000 * 60 * 10)) - .collect::>() - .await; - - tracing::info!("initial scan complete!"); - - // TODO: keep up with config changes. that'll require `select!` and - // some other task w/ `mpsc` - - // when anything changes, we must scan its ENTIRE directory. - // see: https://github.com/notify-rs/notify/issues/412 - while let Some(f) = file_events.recv().await { - if let Ok(events) = f { - _ = events.iter().map(|event| async { - if matches!(event.kind, DebouncedEventKind::Any) { - // handle folders - if event.path.is_dir() { - Self::handle_dir(&event.path).await; - } - - // handle individual files - if event.path.is_file() { - Self::handle_file(&event.path).await - } - } - }); - } - } - - // TODO: but pretend this is here rn - } - - #[tracing::instrument] - async fn handle_file(path: &Path) { - tracing::debug!("working on file..."); - - if let Err(e) = Media::update_metadata(path).await { - tracing::error!( - "Failed to update metadata for file at path `{}`. See error: `{e}`", - path.to_string_lossy() - ) - } - tracing::debug!("file handled."); - } - - #[tracing::instrument] - async fn handle_dir(path: &Path) { - tracing::debug!("starting..."); - let mut walk_dir = async_walkdir::WalkDir::new(path); - - while let Some(entry) = walk_dir.next().await { - if let Ok(entry) = entry { - Self::handle_file(&entry.path()).await; - } - } - tracing::debug!("all entries walked"); - } -} diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..4f201f7 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,12 @@ +# Tests + +These are the integration tests for the library. Please add some of your weirdest media to the `assets` directory and write some fun tests to check our parsers. + +## Running Tests + +Typically, `cargo nextest run` will work just fine. + +If you'd like to use the Tokio debugger, however, you'll need to: + +1. Install it: `cargo binstall --locked tokio-console` +2. Run the tests with a special compiler flag: `RUSTFLAGS="--cfg tokio_unstable" cargo nextest run` diff --git a/tests/assets/beach_location_and_tagged.jpg b/tests/assets/beach_location_and_tagged.jpg new file mode 100755 index 0000000..7f34772 Binary files /dev/null and b/tests/assets/beach_location_and_tagged.jpg differ diff --git a/tests/assets/fear.avif b/tests/assets/fear.avif new file mode 100644 index 0000000..7007313 Binary files /dev/null and b/tests/assets/fear.avif differ diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..94540e1 --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,112 @@ +//! The parent of the other tests. +//! +//! Mostly to import the setup stuff below. + +use camino::Utf8PathBuf; + +use std::{ + env::temp_dir, + net::{Ipv4Addr, SocketAddrV4}, + str::FromStr, +}; + +use backdrop::{ + config::{BugReportInfo, Config, CONFIG}, + database, + error::bug_msg, +}; +use tracing_subscriber::{filter, layer::SubscriberExt as _, util::SubscriberInitExt as _, Layer}; +use uuid::Uuid; + +/// One above the last-used port in `setup`. +/// +/// Use this in the next created test. +const _AVAILABLE_PORT: u16 = 6674; + +/// args for setup +#[allow(dead_code, reason = "it's used in the other tests")] +pub struct Setup { + pub port: u16, + pub watched_folders: Vec, +} + +impl Setup { + #[allow(dead_code, reason = "it's used in the other tests")] + pub fn new(port: u16) -> Self { + Self { + port, + watched_folders: vec!["tests/assets/".into()], + } + } +} + +/// call this at the top of any new test func! :) +#[allow(dead_code, reason = "it's used in the other tests")] +pub async fn setup(args: Setup) { + // create tokio-console logger + server + let tokio_console_layer = console_subscriber::ConsoleLayer::builder() + .with_default_env() + .server_addr(SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), args.port)) + .spawn(); + + // start logging (plus the tokio-console stuff) + tracing_subscriber::registry() + .with(tokio_console_layer) + .with( + tracing_subscriber::fmt::layer() + .with_filter(filter::EnvFilter::from_str("DEBUG,sqlx=INFO").unwrap()), + ) + .init(); + + // initialize the config (required) + init_config_testing(&args.watched_folders).await; + + // setup the database location (also required) + { + let db_temp_dir = Utf8PathBuf::try_from(temp_dir()) + .unwrap() + .join(Uuid::new_v4().to_string()) + .join("_raves_db"); + + tokio::fs::create_dir_all(&db_temp_dir) + .await + .expect("create db temp dir"); + + database::DB_FOLDER_PATH + .set(db_temp_dir) + .expect("db folder path should be unset"); + } +} + +/// Initializes the config static with testing values. +pub async fn init_config_testing(watched_paths: &[Utf8PathBuf]) { + if CONFIG.get().is_none() { + Config::init_config( + watched_paths, + temp_dir().try_into().unwrap(), + temp_dir().try_into().unwrap(), + new_bug_report_info_testing(), + ) + .await; + } else { + tracing::error!( + "attempted to init the config, but the config is already running. {}", + bug_msg().await + ) + } +} + +/// Sample bug report information for usage in tests, to decrease +/// verbosity. +pub fn new_bug_report_info_testing() -> BugReportInfo { + BugReportInfo { + app_name: "bug report info testing info".to_string(), + app_version: "0.1.0".to_string(), + device: "desktop".to_string(), + display: "lineage_and_some_other_stuff".to_string(), + target_triple: "x86_64-farts-gnu".to_string(), + commit: "unknown".to_string(), + repo: "https://github.com/onkoe/backdrop".to_string(), + build_time: "unknown".to_string(), + } +} diff --git a/tests/db.rs b/tests/db.rs new file mode 100644 index 0000000..581eb98 --- /dev/null +++ b/tests/db.rs @@ -0,0 +1,73 @@ +//! This module tests the database. +//! +//! In particular, it focuses on media cache generation, testing against known +//! assets and their metadata fields. +//! +//! Futher contributions to these tests, like weird media, metadata, or +//! regression cases, are greatly appreciated! + +mod common; + +#[cfg(test)] +mod tests { + use std::env::temp_dir; + + use backdrop::{ + database::{self, DATABASE}, + models::media::{metadata::Format, Media}, + }; + use camino::{Utf8Path, Utf8PathBuf}; + use uuid::Uuid; + + /// The database can cache metadata for the beach photo. + #[tokio::test] + async fn beach() { + // set up the database + { + let db_temp_dir = Utf8PathBuf::try_from(temp_dir()) + .unwrap() + .join(Uuid::new_v4().to_string()) + .join("_raves_db"); + + tokio::fs::create_dir_all(&db_temp_dir) + .await + .expect("create db temp dir"); + + database::DB_FOLDER_PATH + .set(db_temp_dir) + .expect("db folder path should be unset"); + } + + // grab database connection from pool + let mut conn = DATABASE.acquire().await.expect("make database connection"); + + // ask it to cache the beach image. + // + // (loading from disk will also cache metadata into db) + let media = Media::load(Utf8Path::new("tests/assets/beach_location_and_tagged.jpg")) + .await + .expect("beach image should be found. (make sure you're running from crate root)"); + + let media_id = media.id; // TODO: remove media local and just use .id on it directly + + // check if its registered in db + let media_from_db = sqlx::query_as::<_, Media>("SELECT * FROM info WHERE id = $1") + .bind(media_id) + .fetch_one(&mut *conn) + .await + .expect("media should be registered in db"); + + // check some of the metadata + assert_eq!(media_from_db.id, media_id, "id match"); + assert!( + media_from_db.path.contains("beach_location_and_tagged.jpg"), + "path contains filename" + ); + assert_eq!(media_from_db.filesize, 5_194_673_i64, "filesize"); + assert_eq!( + media_from_db.format.0, + Format::new_from_mime("image/jpeg").unwrap(), + "mime format" + ); + } +} diff --git a/tests/file_watcher.rs b/tests/file_watcher.rs new file mode 100644 index 0000000..a27e6f7 --- /dev/null +++ b/tests/file_watcher.rs @@ -0,0 +1,91 @@ +//! Tests the file watcher to ensure that it's working properly. + +mod common; + +#[cfg(test)] +mod tests { + use std::{env::temp_dir, time::Duration}; + + use backdrop::{ + database::{DATABASE, INFO_TABLE}, + models::media::{metadata::Format, Media}, + watch::Watch, + }; + + use camino::Utf8PathBuf; + use uuid::Uuid; + + use crate::common::{self, Setup}; + + /// Ensures that the File Watcher doesn't immediately die... :p + #[tokio::test] + async fn watcher_alive() { + common::setup(Setup::new(6669)).await; + + // spawn the watcher as a task + let task = tokio::spawn(Watch::watch()); + + // sleep for a bit + tokio::time::sleep(Duration::from_secs(5)).await; + + // ensure the watcher is still running + assert!(!task.is_finished(), "watcher should run indefinitely!"); + + // kill it to stop the test lol + task.abort(); + } + + /// Checks that the watcher can find files. + /// + /// We'll ensure that it finds at least the beach file. + #[tokio::test] + async fn find_file_in_temp_dir() { + // generate a temp dir + let temp_dir = Utf8PathBuf::try_from(temp_dir()) + .unwrap() + .join(Uuid::new_v4().to_string()); + println!("temp dir located at: `{temp_dir}`"); + tokio::fs::create_dir_all(&temp_dir).await.unwrap(); + + // set up the app + common::setup(Setup { + port: 6670, + watched_folders: [temp_dir.clone()].into(), + }) + .await; + let mut conn = DATABASE.acquire().await.unwrap(); + + // turn on the file watcher + tokio::spawn(Watch::watch()); + + // wipe the `info` table + sqlx::query(&format!("DELETE FROM {INFO_TABLE}")) + .execute(&mut *conn) + .await + .expect("remove all from info table"); + + // copy a photo to the temp dir + tokio::time::sleep(Duration::from_secs(3)).await; + tokio::fs::copy("tests/assets/fear.avif", temp_dir.join("fear.avif")) + .await + .expect("copy to temp dir should work"); + + // wait... then check if we got metadata! + tokio::time::sleep(Duration::from_secs(5)).await; + let media = sqlx::query_as::<_, Media>(&format!("SELECT * FROM {INFO_TABLE}")) + .fetch_one(&mut *conn) + .await + .expect("should find media after adding it"); + + assert!( + media.path.contains("fear.avif"), + "path should contain og filename. was: {}", + media.path + ); + assert_eq!( + media.format.0, + Format::new_from_mime("image/avif").unwrap(), + "media mime ty should match" + ); + } +} diff --git a/tests/hash.rs b/tests/hash.rs new file mode 100644 index 0000000..1f05d04 --- /dev/null +++ b/tests/hash.rs @@ -0,0 +1,85 @@ +mod common; + +#[cfg(test)] +mod tests { + use backdrop::{ + database::DATABASE, + models::media::{hash::MediaHash, Media}, + }; + + use crate::common::{setup, Setup}; + + #[tokio::test] + async fn beach_hash() { + // perform setup + setup(Setup::new(6672)).await; + tracing::info!("post setup"); + + // hashing the beach file should give the same one each time + let beach_hash = MediaHash::hash_file("tests/assets/beach_location_and_tagged.jpg") + .await + .expect("hashing should go ok"); + tracing::info!("post beach_hash"); + + // load media and grab its hash (to check if they're the same) + let beach_media = Media::load("tests/assets/beach_location_and_tagged.jpg") + .await + .expect("load media from disk"); + tracing::info!("post beach_media"); + + // hash the media + let (beach_media_hash, _) = beach_media + .hash() + .await + .expect("media file hashing should work too"); + tracing::info!("post beach_media_hash"); + assert_eq!( + *beach_hash.as_bytes(), + *beach_media_hash.hash, + "hash_file hash + media file hash" + ); + + beach_media_hash + .add_to_table() + .await + .expect("add hash to hashes table"); + tracing::info!("post db insertion"); + + // grabbing that from the db should yield the same hash back! + let mut conn = DATABASE.acquire().await.unwrap(); + let from_database = + sqlx::query_as::<_, MediaHash>("SELECT * FROM hashes WHERE media_id = $1") + .bind(beach_media.id) + .fetch_one(&mut *conn) + .await + .unwrap(); + tracing::info!("post db query"); + + // the initial hash + db hash should be equal! + assert_eq!( + *beach_hash.as_bytes(), + *from_database.hash, + "hash_file hash + db hash are the same" + ); + } + + #[tokio::test] + async fn hardcoded_hash() { + setup(Setup::new(6673)).await; + + const FEAR_AVIF_HASH: [u8; 32] = [ + 0xf8, 0xc, 0xa1, 0x56, 0x78, 0xa3, 0x16, 0xe8, 0x29, 0xa5, 0xd4, 0x9e, 0x1a, 0xad, + 0x9b, 0xdc, 0x66, 0xb6, 0xa1, 0xa2, 0xe6, 0x2a, 0xac, 0xc3, 0x47, 0xfe, 0xba, 0x71, + 0x15, 0xec, 0xd5, 0x2c, + ]; + + // hash the file + let media = Media::load("tests/assets/fear.avif").await.unwrap(); + let hash = media.hash().await.unwrap().0; + + assert_eq!( + FEAR_AVIF_HASH, *hash.hash, + "hardcoded hash is eq to runtime one." + ); + } +}