diff --git a/Cargo.lock b/Cargo.lock index 9e40c47..f9c34b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,7 +482,7 @@ dependencies = [ [[package]] name = "factoria" -version = "0.1.0" +version = "1.0.0" dependencies = [ "bzip", "chrono", @@ -495,6 +495,8 @@ dependencies = [ "memmap2", "number_prefix", "rayon", + "serde", + "serde_json", "term_size", "themelios-archive", "tracing", @@ -1134,6 +1136,7 @@ version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ + "indexmap", "itoa", "ryu", "serde", diff --git a/book/src/factoria.md b/book/src/factoria.md index 284fb02..00cb7b3 100644 --- a/book/src/factoria.md +++ b/book/src/factoria.md @@ -30,4 +30,4 @@ benefit in creating or editing archives compared to using [LB-ARK](./lb-ark.md). there's no risk of accidentally leaking deleted file data, but the space it previously occupied is still there, as well as other evidence of the edit history. Before publishing an archive, it is therefore recommended to use the - `defrag` subcommand to eliminate this unused space. + `rebuild` subcommand to eliminate this unused space. diff --git a/factoria/Cargo.toml b/factoria/Cargo.toml index 8f6a11c..36d0df9 100644 --- a/factoria/Cargo.toml +++ b/factoria/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "factoria" -version = "0.1.0" +version = "1.0.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -24,3 +24,6 @@ tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } tracing-error = "0.2.0" indicatif = { version = "0.17.3", features = ["rayon"] } filetime = "0.2.22" + +serde = { version = "1.0", features = ["derive"] } +serde_json = { version = "1.0", features = ["preserve_order"] } diff --git a/factoria/src/create.rs b/factoria/src/create.rs new file mode 100644 index 0000000..ac734cb --- /dev/null +++ b/factoria/src/create.rs @@ -0,0 +1,198 @@ +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use std::io::{prelude::*, SeekFrom}; +use std::time::SystemTime; + +use clap::ValueHint; +use serde::de::{self, Deserialize}; +use eyre_span::emit; + +use themelios_archive::dirdat::{self, DirEntry, Name}; + +#[derive(Debug, Clone, clap::Args)] +#[command(arg_required_else_help = true)] +pub struct Command { + /// Directory to place resulting .dir/.dat in + #[clap(long, short, value_hint = ValueHint::DirPath)] + output: Option, + + /// The .json indexes to reconstruct + #[clap(value_hint = ValueHint::FilePath, required = true)] + json_file: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct FileId(u16); + +#[derive(Debug, Clone, serde::Deserialize)] +#[serde(remote = "Entry")] +struct Entry { + path: Option, + name: Option, + #[serde(default, deserialize_with="parse_compress_mode")] + compress: Option, + reserve: Option, + #[serde(default)] + unknown1: u32, + #[serde(default)] + unknown2: usize, +} + +pub fn run(cmd: &Command) -> eyre::Result<()> { + for json_file in &cmd.json_file { + emit(create(cmd, json_file)); + } + Ok(()) +} + +#[tracing::instrument(skip_all, fields(path=%json_file.display(), out))] +fn create(cmd: &Command, json_file: &Path) -> eyre::Result<()> { + let json: BTreeMap> + = serde_json::from_reader(std::fs::File::open(json_file)?)?; + + let out_dir = cmd.output.as_ref() + .map_or_else(|| json_file.parent().unwrap(), |v| v.as_path()) + .join(json_file.file_name().unwrap()) + .with_extension("dir"); + + tracing::Span::current().record("out", tracing::field::display(out_dir.display())); + std::fs::create_dir_all(out_dir.parent().unwrap())?; + + let size = json.last_key_value().map(|a| a.0.0 + 1).unwrap_or_default() as usize; + let mut entries = vec![None; size]; + for (k, v) in json { + entries[k.0 as usize] = v + } + + // TODO lots of duplicated code between here and rebuild + + let mut out_dat = std::fs::File::create(out_dir.with_extension("dat.tmp"))?; + out_dat.write_all(b"LB DAT\x1A\0")?; + out_dat.write_all(&u64::to_le_bytes(size as u64))?; + for _ in 0..=size { + out_dat.write_all(&u32::to_le_bytes(0))?; + } + + let mut dir = Vec::with_capacity(size); + for (id, e) in entries.into_iter().enumerate() { + let mut ent = DirEntry::default(); + if let Some(e) = e { + let name = match &e { + Entry { name: Some(name), .. } => name.as_str(), + Entry { path: Some(path), .. } => path.file_name().unwrap().to_str().unwrap(), + _ => unreachable!() + }; + let _span = tracing::info_span!("file", name=%name, path=tracing::field::Empty).entered(); + ent.name = Name::try_from(name)?; + ent.unk1 = e.unknown1; + ent.unk2 = e.unknown2; + + let pos = out_dat.seek(SeekFrom::End(0))?; + ent.offset = pos as usize; + + if let Some(path) = &e.path { + let path = json_file.parent().unwrap().join(path); + _span.record("path", tracing::field::display(path.display())); + + let data = std::fs::read(&path)?; + let mut data = match e.compress { + Some(method) => bzip::compress_ed6_to_vec(&data, method), + None => data, + }; + ent.size = data.len(); + ent.reserved_size = e.reserve.unwrap_or(data.len()); + + while data.len() < e.reserve.unwrap_or(0) { + data.push(0); + } + out_dat.write_all(&data)?; + + let timestamp = std::fs::metadata(path)? + .modified() + .unwrap_or_else(|_| SystemTime::now()); + ent.timestamp = timestamp.duration_since(SystemTime::UNIX_EPOCH)?.as_secs() as u32; + } + + let pos2 = out_dat.seek(SeekFrom::End(0))?; + out_dat.seek(SeekFrom::Start(16 + 4 * id as u64))?; + out_dat.write_all(&u32::to_le_bytes(pos as u32))?; + out_dat.write_all(&u32::to_le_bytes(pos2 as u32))?; + } + dir.push(ent) + } + + std::fs::rename(out_dir.with_extension("dat.tmp"), out_dir.with_extension("dat"))?; + std::fs::write(&out_dir, dirdat::write_dir(&dir))?; + + tracing::info!("created"); + + Ok(()) +} + +fn parse_compress_mode<'de, D: serde::Deserializer<'de>>(des: D) -> Result, D::Error> { + match >::deserialize(des)? { + Some(1) => Ok(Some(bzip::CompressMode::Mode1)), + Some(2) => Ok(Some(bzip::CompressMode::Mode2)), + None => Ok(None), + Some(v) => Err(de::Error::invalid_value( + de::Unexpected::Unsigned(v as _), + &"1, 2, or null"), + ), + } +} + +impl std::str::FromStr for Entry { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Entry { + path: Some(PathBuf::from(s)), + name: None, + compress: None, + reserve: None, + unknown1: 0, + unknown2: 0, + }) + } +} + +impl<'de> Deserialize<'de> for Entry { + fn deserialize>(des: D) -> Result { + struct V; + impl<'de> de::Visitor<'de> for V { + type Value = Entry; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("string or map") + } + + fn visit_str(self, value: &str) -> Result { + std::str::FromStr::from_str(value).map_err(de::Error::custom) + } + + fn visit_map>(self, map: M) -> Result { + Entry::deserialize(de::value::MapAccessDeserializer::new(map)) + } + } + + let v = des.deserialize_any(V)?; + if v.path.is_none() && v.name.is_none() { + return Err(de::Error::custom("at least one of `path` and `name` must be present")) + } + Ok(v) + } +} + +impl<'de> Deserialize<'de> for FileId { + fn deserialize>(des: D) -> Result { + let s = String::deserialize(des)?; + let err = || de::Error::invalid_value( + de::Unexpected::Str(&s), + &"a hexadecimal number", + ); + + let s = s.strip_prefix("0x").ok_or_else(err)?; + let v = u32::from_str_radix(s, 16).map_err(|_| err())?; + Ok(FileId(v as u16)) + } +} diff --git a/factoria/src/index.rs b/factoria/src/index.rs new file mode 100644 index 0000000..ad261d6 --- /dev/null +++ b/factoria/src/index.rs @@ -0,0 +1,222 @@ +use std::path::{Path, PathBuf}; +use std::io::{self, BufWriter, Write}; + +use clap::ValueHint; +use serde::Serialize; +use serde_json::Value; +use eyre_span::emit; + +use themelios_archive::dirdat::{self, DirEntry, Name}; + +#[derive(Debug, Clone, clap::Args)] +#[command(arg_required_else_help = true)] +/// Produces a json file listing all the files in an archive. +/// +/// Combined with the `extract` command, this is enough for `create` to recreate an identical archive. +/// +/// Note that while this writes file ids with eight hex digits, only the lower four are used when reconstructing. +pub struct Command { + /// Do not attempt to infer compression mode. + /// + /// Useful when extracted with `extract -C`. + #[clap(short='C', long)] + compressed: bool, + + /// Directory to place the resulting json file in. + /// + /// This is *not* the path of the actual file, for consistency with the `extract` command. + /// As a special case, if this is `-`, the json is written to stdout. + #[clap(long, short, value_hint = ValueHint::DirPath)] + output: Option, + + /// The .dir files to create indexes for + #[clap(value_hint = ValueHint::FilePath, required = true)] + dir_file: Vec, +} + +pub fn run(cmd: &Command) -> eyre::Result<()> { + for dir_file in &cmd.dir_file { + emit(index(cmd, dir_file)); + } + Ok(()) +} + +#[tracing::instrument(skip_all, fields(path=%dir_file.display(), out))] +fn index(cmd: &Command, dir_file: &Path) -> eyre::Result<()> { + let dir = dirdat::read_dir(&std::fs::read(dir_file)?)?; + let dat = if !cmd.compressed { + Some(crate::util::mmap(&dir_file.with_extension("dat"))?) + } else { + None + }; + let dat = dat.as_deref(); + let archive_number = crate::list::get_archive_number(dir_file); + + let json = dir.iter().enumerate().map(|(id, ent)| { + let _span = tracing::debug_span!("index_file", id=%format_args!("{id:04X}"), name=%ent.name).entered(); + let mut key = String::from("0x"); + if let Some(archive_number) = archive_number { + key.push_str(&format!("{:04X}", archive_number)); + } + key.push_str(&format!("{:04X}", id)); + + (key, index_file(ent, dir_file, dat)) + }).collect::(); + + let out = if cmd.output.as_ref().is_some_and(|a| a == Path::new("-")) { + tracing::Span::current().record("out", tracing::field::display("stdout")); + Box::new(std::io::stdout().lock()) as Box + } else { + let out = cmd.output.as_ref() + .map_or_else(|| dir_file.parent().unwrap(), |v| v.as_path()) + .join(dir_file.file_name().unwrap()) + .with_extension("json"); + + std::fs::create_dir_all(out.parent().unwrap())?; + tracing::Span::current().record("out", tracing::field::display(out.display())); + Box::new(std::fs::File::create(out)?) + }; + + let mut out = BufWriter::new(out); + let mut ser = serde_json::Serializer::with_formatter(&mut out, MyFormatter::new(1)); + json.serialize(&mut ser)?; + out.write_all(b"\n")?; + out.flush()?; + + tracing::info!("done"); + + Ok(()) +} + +fn index_file(m: &DirEntry, dir_file: &Path, dat: Option<&[u8]>) -> Value { + if m.name == Name::default() { + Value::Null + } else { + let mut o = serde_json::Map::new(); + + if m.timestamp == 0 { + o.insert("path".into(), Value::Null); + o.insert("name".into(), m.name.to_string().into()); + } else { + o.insert("path".into(), format!("{}/{}", dir_file.file_stem().unwrap().to_string_lossy(), m.name).into()); + let comp = dat.and_then(|a| a.get(m.offset..m.offset+m.size)).and_then(bzip::compression_info_ed6); + if let Some(comp) = comp { + match comp.1.unwrap_or_default() { + bzip::CompressMode::Mode1 => o.insert("compress".into(), 1u8.into()), + bzip::CompressMode::Mode2 => o.insert("compress".into(), 2u8.into()), + }; + } + } + + if m.reserved_size != m.size { + o.insert("reserve".into(), m.reserved_size.into()); + } + if m.unk1 != 0 { + o.insert("unknown1".into(), m.unk1.into()); + } + if m.unk2 != 0 { + o.insert("unknown2".into(), m.unk2.into()); + } + + // size, timestamp. and offset are all derived from the file + + o.into() + } +} + +struct MyFormatter { + level: usize, + indent_to: usize, + has_value: bool, +} + +impl MyFormatter { + pub fn new(depth: usize) -> Self { + Self { + level: 0, + indent_to: depth, + has_value: false, + } + } +} + +impl serde_json::ser::Formatter for MyFormatter { + #[inline] + fn begin_array(&mut self, writer: &mut W) -> io::Result<()> { + self.level += 1; + self.has_value = false; + writer.write_all(b"[") + } + + #[inline] + fn end_array(&mut self, writer: &mut W) -> io::Result<()> { + if self.has_value { + indent(writer, self.level - 1, self.indent_to - 1)?; + } + self.level -= 1; + writer.write_all(b"]") + } + + #[inline] + fn begin_array_value(&mut self, writer: &mut W, first: bool) -> io::Result<()> { + if !first { + writer.write_all(b",")?; + } + indent(writer, self.level, self.indent_to)?; + Ok(()) + } + + #[inline] + fn end_array_value(&mut self, _writer: &mut W) -> io::Result<()> { + self.has_value = true; + Ok(()) + } + + #[inline] + fn begin_object(&mut self, writer: &mut W) -> io::Result<()> { + self.level += 1; + self.has_value = false; + writer.write_all(b"{") + } + + #[inline] + fn end_object(&mut self, writer: &mut W) -> io::Result<()> { + if self.has_value { + indent(writer, self.level - 1, self.indent_to - 1)?; + } + self.level -= 1; + writer.write_all(b"}") + } + + #[inline] + fn begin_object_key(&mut self, writer: &mut W, first: bool) -> io::Result<()> { + if !first { + writer.write_all(b",")?; + } + indent(writer, self.level, self.indent_to)?; + Ok(()) + } + + #[inline] + fn begin_object_value(&mut self, writer: &mut W) -> io::Result<()> { + writer.write_all(b": ") + } + + #[inline] + fn end_object_value(&mut self, _writer: &mut W) -> io::Result<()> { + self.has_value = true; + Ok(()) + } +} + +fn indent(wr: &mut W, n: usize, m: usize) -> io::Result<()> { + if n <= m { + wr.write_all(b"\n")?; + for _ in 0..n { + wr.write_all(b"\t")?; + } + } else { + wr.write_all(b" ")?; + } + Ok(()) +} diff --git a/factoria/src/list.rs b/factoria/src/list.rs index 097e3a2..aa06c96 100644 --- a/factoria/src/list.rs +++ b/factoria/src/list.rs @@ -346,7 +346,7 @@ fn get_entries(cmd: &Command, dir_file: &Path) -> eyre::Result> { Ok(entries) } -fn get_archive_number(path: &Path) -> Option { +pub(crate) fn get_archive_number(path: &Path) -> Option { let name = path .file_name()? .to_str()? diff --git a/factoria/src/main.rs b/factoria/src/main.rs index d6eb849..0d32b95 100644 --- a/factoria/src/main.rs +++ b/factoria/src/main.rs @@ -11,6 +11,8 @@ mod list; mod add; mod remove; mod rebuild; +mod index; +mod create; #[derive(Debug, Clone, Parser)] #[command(args_conflicts_with_subcommands = true, disable_help_subcommand = true)] @@ -35,10 +37,10 @@ enum Command { Remove(remove::Command), /// Clear out unused data from archives Rebuild(rebuild::Command), - /// Create a json index file for an archive (TBI) - Index, - /// Create an archive from a json index file (TBI) - Create, + /// Create a json index file for an archive + Index(index::Command), + /// Create an archive from a json index file + Create(create::Command), } fn main() -> eyre::Result<()> { @@ -66,8 +68,8 @@ fn main() -> eyre::Result<()> { Command::Add(cmd) => emit(add::run(&cmd)), Command::Remove(cmd) => emit(remove::run(&cmd)), Command::Rebuild(cmd) => emit(rebuild::run(&cmd)), - Command::Index => todo!(), - Command::Create => todo!(), + Command::Index(cmd) => emit(index::run(&cmd)), + Command::Create(cmd) => emit(create::run(&cmd)), }; Ok(()) }