Skip to content

Commit

Permalink
feat: store attachments under a shorter path (#259)
Browse files Browse the repository at this point in the history
* Attachments are now stored under
  `<data_dir>/gurk/files/year-month-day/`. It is easier to open a
  shorter path from terminal. Also they are nicely grouped under the
  `files` folder.
* If attachment has a file name, it is used when saving it, otherwise
  the first 8 chars of its hex digest are used.
* All mimes are based on the content type field are supported now. In
  particular, extensions are resolved for all content types.

Also upgrade presage which now verifies the digest of downloaded
attachments.
  • Loading branch information
boxdot authored Dec 22, 2023
1 parent 21c72c1 commit 0b08486
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 59 deletions.
16 changes: 13 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 27 additions & 24 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,58 +28,61 @@ debug = true
dev = ["prost", "base64"]

[dependencies]
presage = { git = "https://github.com/whisperfish/presage", rev = "2d9a02b" }
presage-store-sled = { git = "https://github.com/whisperfish/presage", rev = "2d9a02b" }
presage = { git = "https://github.com/whisperfish/presage", rev = "6d7003f" }
presage-store-sled = { git = "https://github.com/whisperfish/presage", rev = "6d7003f" }

# dev feature dependencies
prost = { version = "0.10.4", optional = true }
base64 = { version = "0.13.1", optional = true }

aho-corasick = "0.7.19"
anyhow = "1.0.66"
arboard = { version = "3.2.0", features = ["wayland-data-control"] }
async-trait = "0.1.58"
chrono = { version = "0.4.22", default-features = false, features = ["serde"] }
clap = { version = "4.0.19", features = ["derive"] }
crossterm = { version = "0.27.0", features = ["event-stream"] }
derivative = "2.2.0"
dirs = "4.0.0"
emojis = "0.6.1"
futures-channel = "0.3.25"
hex = "0.4.3"
hostname = "0.3.1"
image = { version = "0.24.6", default-features = false, features = ["png"] }
itertools = "0.10.5"
log-panics = "2.1.0"
mime_guess = "2.0.4"
notify-rust = "4.5.10"
once_cell = "1.18.0"
opener = "0.5.0"
phonenumber = "0.3.1"
postcard = { version = "1.0.4", features = ["alloc"] }
qr2term = { git = "https://github.com/boxdot/qr2term-rs", rev = "ed8ae7f" }
ratatui = "0.23.0"
rayon = "1.7.0"
regex = "1.9.5"
regex-automata = "0.1.10"
scopeguard = "1.1.0"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.87"
sha2 = "0.10.8"
sqlx = { version = "0.7.2", features = ["sqlite", "runtime-tokio-rustls", "uuid", "chrono"] }
textwrap = "0.16.0"
thiserror = "1.0.40"
thread_local = "1.1.7"
tokio = { version = "1.21.2", default-features = false, features = ["rt-multi-thread", "macros", "net", "time"] }
tokio-stream = "0.1.11"
toml = "0.5.9"
unicode-width = "0.1.10"
uuid = { version = "1.2", features = ["v4"] }
whoami = "1.2.3"
tracing = "0.1.37"
tracing-appender = "0.2.2"
tracing-subscriber = "0.3.16"
futures-channel = "0.3.25"
qr2term = { git = "https://github.com/boxdot/qr2term-rs", rev = "ed8ae7f" }
clap = { version = "4.0.19", features = ["derive"] }
aho-corasick = "0.7.19"
sqlx = { version = "0.7.2", features = ["sqlite", "runtime-tokio-rustls", "uuid", "chrono"] }
thiserror = "1.0.40"
postcard = { version = "1.0.4", features = ["alloc"] }
emojis = "0.6.1"

# dev feature dependencies
prost = { version = "0.10.4", optional = true }
base64 = { version = "0.13.1", optional = true }
arboard = { version = "3.2.0", features = ["wayland-data-control"] }
rayon = "1.7.0"
thread_local = "1.1.7"
ratatui = "0.23.0"
regex = "1.9.5"
once_cell = "1.18.0"
unicode-width = "0.1.10"
uuid = { version = "1.2", features = ["v4"] }
whoami = "1.2.3"

[dev-dependencies]
criterion = { version = "0.5", features = ["async_tokio", "html_reports"] }
hex-literal = "0.4.1"
insta = { version = "1.21.0", features = ["json"] }
quickcheck = "1.0.3"
quickcheck_macros = "1.0.0"
Expand All @@ -89,7 +92,7 @@ tempfile = "3.3.0"
name = "app"
harness = false

# [patch."https://github.com/whisperfish/presage.git"]
[patch."https://github.com/whisperfish/presage.git"]
# presage = { path = "../presage/presage" }
# presage-store-sled = { path = "../presage/presage-store-sled" }
#
Expand Down
204 changes: 204 additions & 0 deletions src/signal/attachment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
use std::path::{Path, PathBuf};

use anyhow::Context;
use chrono::Local;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, IMAGE_JPEG};
use mime_guess::{get_mime_extensions, Mime};
use presage::proto::AttachmentPointer;
use tracing::info;

use crate::signal::Attachment;
use crate::util::utc_timestamp_msec_to_local;

const DIGEST_BYTES_LEN: usize = 4;

pub(super) fn save(
data_dir: impl AsRef<Path>,
pointer: AttachmentPointer,
data: Vec<u8>,
) -> anyhow::Result<Attachment> {
let base_dir = data_dir.as_ref().join("files");

let digest = pointer
.digest
.as_deref()
.context("dropping attachment without digest")?;
let digest_hex = hex::encode(digest);

let mime: Mime = pointer
.content_type()
.parse()
.unwrap_or(APPLICATION_OCTET_STREAM);

let name = derive_name(&pointer, digest, &mime);

let date = pointer
.upload_timestamp
.map(utc_timestamp_msec_to_local)
.unwrap_or_else(Local::now)
.date_naive();
let filedir = base_dir.join(date.to_string());
let filepath = conflict_free_filename(&filedir, name);

std::fs::create_dir_all(&filedir)
.with_context(|| format!("failed to create dir: {}", filedir.display()))?;
std::fs::write(&filepath, data)
.with_context(|| format!("failed to save attachment at: {}", filepath.display()))?;

info!(dest =% filepath.display(), "saved attachment");

Ok(Attachment {
id: digest_hex,
content_type: mime.to_string(),
filename: filepath,
size: pointer.size.unwrap_or_default(),
})
}

fn conflict_free_filename(filedir: &Path, name: String) -> PathBuf {
let mut filepath = filedir.join(&name);

// resolve conflicts
let mut idx = 0;
while filepath.exists() {
let name_path = Path::new(&name);
match name_path.file_stem().zip(name_path.extension()) {
Some((stem, extension)) => {
idx += 1;
let stem = stem.to_string_lossy();
let extension = extension.to_string_lossy();
filepath = filedir.join(format!("{stem}.{idx}.{extension}"));
}
None => {
idx += 1;
filepath = filedir.join(format!("{name}.{idx}"));
}
}
}
filepath
}

fn derive_name(pointer: &AttachmentPointer, digest: &[u8], mime: &Mime) -> String {
pointer.file_name.clone().unwrap_or_else(|| {
let mut name = hex::encode(&digest[..DIGEST_BYTES_LEN]);
let extension = if mime == &IMAGE_JPEG {
// special case due to: <https://github.com/abonander/mime_guess/issues/59>
Some("jpeg")
} else if mime == &APPLICATION_OCTET_STREAM {
None
} else {
get_mime_extensions(mime).and_then(|extensions| extensions.first().copied())
};
if let Some(extension) = extension {
name.push('.');
name.push_str(extension);
};
name
})
}

#[cfg(test)]
mod tests {
use hex_literal::hex;

use super::*;

fn attachment_pointer(
content_type: &str,
digest: &[u8],
file_name: Option<&str>,
upload_timestamp: u64,
) -> AttachmentPointer {
AttachmentPointer {
content_type: Some(content_type.into()),
digest: Some(digest.into()),
file_name: file_name.map(|s| s.to_owned()),
upload_timestamp: Some(upload_timestamp),
key: None,
size: Some(42),
thumbnail: None,
incremental_digest: None,
incremental_mac_chunk_size: None,
flags: None,
width: None,
height: None,
caption: None,
blur_hash: None,
cdn_number: None,
attachment_identifier: None,
}
}

#[test]
fn test_save() {
let tempdir = tempfile::tempdir().unwrap();

let digest = hex!("d51e9a355d4351ae5fbf2846d18bb384471555aa0ea6ee9075eb63f99ecddf77");
let upload_timestamp = 1703160458 * 1000;

let attachment = save(
tempdir.path(),
attachment_pointer("image/jpeg", &digest, Some("image.jpeg"), upload_timestamp),
vec![42],
)
.unwrap();

assert_eq!(attachment.id, hex::encode(digest));
assert_eq!(attachment.content_type, "image/jpeg");
assert_eq!(attachment.size, 42);
assert_eq!(
attachment.filename,
tempdir.path().join("files/2023-12-21/image.jpeg")
);

assert_eq!(std::fs::read(attachment.filename).unwrap(), &[42]);

// duplicate
let attachment = save(
tempdir.path(),
attachment_pointer("image/jpeg", &digest, Some("image.jpeg"), upload_timestamp),
vec![42],
)
.unwrap();
assert_eq!(
attachment.filename,
tempdir.path().join("files/2023-12-21/image.1.jpeg")
);

// without name
let attachment = save(
tempdir.path(),
attachment_pointer("image/jpeg", &digest, None, upload_timestamp),
vec![42],
)
.unwrap();
assert_eq!(
attachment.filename,
tempdir.path().join("files/2023-12-21/d51e9a35.jpeg")
);

// without name and mime octet-stream
let attachment = save(
tempdir.path(),
attachment_pointer("application/octet-stream", &digest, None, upload_timestamp),
vec![42],
)
.unwrap();
assert_eq!(
attachment.filename,
tempdir.path().join("files/2023-12-21/d51e9a35")
);

// without name and mime pdf
let attachment = save(
tempdir.path(),
attachment_pointer("application/pdf", &digest, None, upload_timestamp),
vec![42],
)
.unwrap();
assert_eq!(
attachment.filename,
tempdir.path().join("files/2023-12-21/d51e9a35.pdf")
);
}
}
Loading

0 comments on commit 0b08486

Please sign in to comment.