From 3ed4442e0ed0906797aeeb7a623f23a6ef07e160 Mon Sep 17 00:00:00 2001 From: William Date: Sun, 1 Oct 2023 18:01:47 +0100 Subject: [PATCH] Try and be more forgiving to entry paths that are not valid UTF-8 --- .vscode/settings.json | 3 +- Cargo.lock | 1 + fastgmad-lib/Cargo.toml | 3 + fastgmad-lib/src/extract/mod.rs | 106 +++++++++++++++++++++++--------- fastgmad-lib/src/util.rs | 25 +++++++- 5 files changed, 105 insertions(+), 33 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 98e490e..ba6617b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "rust-analyzer.cargo.features": "all" + "rust-analyzer.cargo.features": "all", + "editor.formatOnSave": true } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 467702d..4f5987e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -186,6 +186,7 @@ dependencies = [ "thiserror", "uuid", "walkdir", + "winapi", "zip", ] diff --git a/fastgmad-lib/Cargo.toml b/fastgmad-lib/Cargo.toml index c03e4d1..ad0698d 100644 --- a/fastgmad-lib/Cargo.toml +++ b/fastgmad-lib/Cargo.toml @@ -24,6 +24,9 @@ steamworks = { version = "0.9", optional = true } libloading = { version = "0.8", optional = true } thiserror = "1" +[target.'cfg(windows)'.dependencies] +winapi = { version = "0.3", features = ["stringapiset"] } + [dev-dependencies] lazy_static = "1" sysreq = "0.1.6" diff --git a/fastgmad-lib/src/extract/mod.rs b/fastgmad-lib/src/extract/mod.rs index 6d29eff..0120192 100644 --- a/fastgmad-lib/src/extract/mod.rs +++ b/fastgmad-lib/src/extract/mod.rs @@ -28,7 +28,11 @@ pub fn extract_gma(conf: &ExtractGmaConfig, r: &mut (impl BufRead + IoSkip)) -> } #[cfg(feature = "binary")] -pub fn extract_gma_with_done_callback(conf: &ExtractGmaConfig, r: &mut (impl BufRead + IoSkip), done_callback: &mut dyn FnMut()) -> Result<(), FastGmadError> { +pub fn extract_gma_with_done_callback( + conf: &ExtractGmaConfig, + r: &mut (impl BufRead + IoSkip), + done_callback: &mut dyn FnMut(), +) -> Result<(), FastGmadError> { if conf.max_io_threads.get() == 1 { StandardExtractGma::extract_gma_with_done_callback(conf, r, done_callback) } else { @@ -37,7 +41,11 @@ pub fn extract_gma_with_done_callback(conf: &ExtractGmaConfig, r: &mut (impl Buf } trait ExtractGma { - fn extract_gma_with_done_callback(conf: &ExtractGmaConfig, r: &mut (impl BufRead + IoSkip), done_callback: &mut dyn FnMut()) -> Result<(), FastGmadError> { + fn extract_gma_with_done_callback( + conf: &ExtractGmaConfig, + r: &mut (impl BufRead + IoSkip), + done_callback: &mut dyn FnMut(), + ) -> Result<(), FastGmadError> { if conf.out.is_dir() { log::warn!( "Output directory already exists; files not present in this GMA but present in the existing output directory will NOT be deleted" @@ -181,18 +189,19 @@ trait ExtractGma { let size = r .read_i64::() .map_err(|error| fastgmad_io_error!(while "reading entry size", error: error))?; + let _crc = r .read_u32::() .map_err(|error| fastgmad_io_error!(while "reading entry CRC", error: error))?; - if let Some(entry) = GmaEntry::try_new(&conf.out, path, size) { - #[cfg(feature = "binary")] - { - total_size += entry.size as u64; - } + let entry = GmaEntry::new(&conf.out, path, size)?; - file_index.push(entry); + #[cfg(feature = "binary")] + { + total_size += entry.size as u64; } + + file_index.push(entry); } // File contents @@ -246,7 +255,8 @@ impl ExtractGma for StandardExtractGma { Some(path) => path, None => { // Skip past the entry if we couldn't get a path for it - r.skip(*size as u64).map_err(|error| fastgmad_io_error!(while "skipping past GMA entry data", error: error))?; + r.skip(*size as u64) + .map_err(|error| fastgmad_io_error!(while "skipping past GMA entry data", error: error))?; continue; } }; @@ -310,7 +320,8 @@ impl ExtractGma for ParallelExtractGma { Some(path) => path, None => { // Skip past the entry if we couldn't get a path for it - r.skip(*size as u64).map_err(|error| fastgmad_io_error!(while "skipping past GMA entry data", error: error))?; + r.skip(*size as u64) + .map_err(|error| fastgmad_io_error!(while "skipping past GMA entry data", error: error))?; continue; } }; @@ -404,34 +415,69 @@ struct GmaEntry { size: usize, } impl GmaEntry { - fn try_new(base_path: &Path, path: Vec, size: i64) -> Option { + fn new(base_path: &Path, path: Vec, size: i64) -> Result { + let path = { + #[cfg(unix)] + { + use std::{ffi::OsString, os::unix::ffi::OsStringExt}; + let path = OsString::from_vec(path); + Some(path) + } + #[cfg(windows)] + { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + match crate::util::ansi_to_wide(&path) { + Ok(path) => Some(OsString::from_wide(&path)), + Err(err) => { + log::info!( + "warning: skipping GMA entry with incompatible file path: {:?} ({err})", + String::from_utf8_lossy(&path), + err + ); + None + } + } + } + #[cfg(not(any(unix, windows)))] + { + match String::from_utf8(path) { + Ok(path) => Some(PathBuf::from(path)), + Err(err) => { + log::info!( + "warning: skipping GMA entry with non-UTF-8 file path: {:?}", + String::from_utf8_lossy(err.as_bytes()) + ); + None + } + } + } + }; + let size = match usize::try_from(size) { Ok(size) => size, Err(_) => { - log::warn!("Skipping GMA entry with unsupported file size ({size} bytes): {path:?}"); - return None; + let error = std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsupported file size for this system ({size} bytes > max {} bytes)", usize::MAX), + ); + if let Some(path) = path { + return Err(fastgmad_io_error!(while "reading GMA entry size", error: error, path: path)); + } else { + return Err(fastgmad_io_error!(while "reading GMA entry size", error: error)); + } } }; - let path = match String::from_utf8(path) { - Ok(path) => Some(path).and_then(|path| { - let path = Path::new(&path); - if path.components().any(|c| matches!(c, Component::ParentDir | Component::Prefix(_))) { - log::warn!("Skipping GMA entry with invalid file path: {:?}", path); - None - } else { - Some(base_path.join(path)) - } - }), - Err(err) => { - log::info!( - "warning: skipping GMA entry with non-UTF-8 file path: {:?}", - String::from_utf8_lossy(err.as_bytes()) - ); + let path = path.and_then(|path| { + let path = Path::new(&path); + if path.components().any(|c| matches!(c, Component::ParentDir | Component::Prefix(_))) { + log::warn!("Skipping GMA entry with invalid file path: {:?}", path); None + } else { + Some(base_path.join(path)) } - }; + }); - Some(Self { path, size }) + Ok(Self { path, size }) } } diff --git a/fastgmad-lib/src/util.rs b/fastgmad-lib/src/util.rs index a3959b4..f95c8d3 100644 --- a/fastgmad-lib/src/util.rs +++ b/fastgmad-lib/src/util.rs @@ -1,6 +1,7 @@ use std::{ - io::{BufRead, Write, Seek, SeekFrom, StdinLock, BufReader}, - path::Path, fs::File, + fs::File, + io::{BufRead, BufReader, Seek, SeekFrom, StdinLock, Write}, + path::Path, }; pub fn is_hidden_file(path: &Path) -> Result { @@ -106,6 +107,26 @@ impl IoSkip for StdinLock<'_> { } } +#[cfg(windows)] +pub fn ansi_to_wide(ansi: &[u8]) -> Result, std::io::Error> { + use winapi::um::{stringapiset::MultiByteToWideChar, winnls::CP_ACP}; + + // Get the required buffer size. + let required_size = unsafe { MultiByteToWideChar(CP_ACP, 0, ansi.as_ptr() as *const i8, ansi.len() as i32, core::ptr::null_mut(), 0) }; + if required_size == 0 { + return Err(std::io::Error::last_os_error()); + } + + // Convert the ANSI string to wide string. + let mut wide = vec![0u16; required_size as usize]; + let ret = unsafe { MultiByteToWideChar(CP_ACP, 0, ansi.as_ptr() as *const i8, ansi.len() as i32, wide.as_mut_ptr(), required_size) }; + if ret == 0 { + return Err(std::io::Error::last_os_error()); + } + + Ok(wide) +} + #[cfg(feature = "binary")] mod binary { use super::*;