From c45d3d9777f37953d93bba1048ea17f5437da1ba Mon Sep 17 00:00:00 2001 From: Nick Babcock Date: Sat, 11 Nov 2023 09:13:28 -0600 Subject: [PATCH] Allow direct struct field matching from binary token The binary token resolution leaves some efficiency on the table as it first translates the token to a string and then matches the string against struct fields. The PR asks the question, what if we reduce the operation to just matching on the 16bit value? Remove the translation step and simplify matching. This is what I envision and technically now works in this PR: ```rust struct MyStruct { #[jomini(token = 0x2d82)] field1: String, } ``` Large caveats: - Direct matching is only done when the string resolution fails - And must use `FailedResolveStrategy::Visit` The first caveat is the largest problem as we don't want to remove tokens from the token list because token resolution needs to work on values (they aren't always keys). I think the best course of action will be to classify each struct as if fields can be directly resolved or if it needs translation based on the presence of the `#[jomini(token)]` attribute. For direct structs, swap the request to `deserialize_identifier` to `deserialize_u16` to give a hint that tokens should not be resolved. Some unanswered questions: - Do the efficiency gains justify the complication? - I know PDS doesn't want the token list revealed, I wonder if this is obfuscated enough to be ok. --- README.md | 28 ++++++++ jomini_derive/src/lib.rs | 70 ++++++++++++++++++- jomini_derive/tests/compile-fail/tokens.rs | 13 ++++ .../tests/compile-fail/tokens.stderr | 7 ++ jomini_derive/tests/compiletest.rs | 1 + src/binary/de.rs | 53 +++++++++++++- src/lib.rs | 55 +++++++++++++++ tests/de.rs | 36 +++++++++- 8 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 jomini_derive/tests/compile-fail/tokens.rs create mode 100644 jomini_derive/tests/compile-fail/tokens.stderr diff --git a/README.md b/README.md index 833500c..c6b1652 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,34 @@ let actual: MyStruct = OndemandBinaryDeserializer::builder_flavor(BinaryTestFlav assert_eq!(actual, MyStruct { field1: "ENG".to_string() }); ``` +### Direct identifier deserialization with `token` attribute + +There may be some performance loss during binary deserialization as +tokens are resolved to strings via a `TokenResolver` and then matched against the +string representations of a struct's fields. + +We can fix this issue by directly encoding the expected token value into the struct: + +```rust +#[derive(JominiDeserialize, PartialEq, Debug)] +struct MyStruct { + #[jomini(token = 0x2d82)] + field1: String, +} + +// Empty token to string resolver +let map = HashMap::::new(); + +let actual: MyStruct = BinaryDeserializer::builder_flavor(BinaryTestFlavor) + .deserialize_slice(&data[..], &map)?; +assert_eq!(actual, MyStruct { field1: "ENG".to_string() }); +``` + +Couple notes: + +- This does not obviate need for the token to string resolver as tokens may be used as values. +- If the `token` attribute is specified on one field on a struct, it must be specified on all fields of that struct. + ## Caveats Caller is responsible for: diff --git a/jomini_derive/src/lib.rs b/jomini_derive/src/lib.rs index c9ea3e5..24cc276 100644 --- a/jomini_derive/src/lib.rs +++ b/jomini_derive/src/lib.rs @@ -138,6 +138,32 @@ fn alias(f: &Field) -> Option { .next() } +fn binary_token(f: &Field) -> Option { + f.attrs + .iter() + .filter(|attr| attr.path.is_ident("jomini")) + .map(|attr| attr.parse_meta().unwrap()) + .filter_map(|meta| match meta { + Meta::List(x) => Some(x), + _ => None, + }) + .flat_map(|x| x.nested) + .filter_map(|x| match x { + NestedMeta::Meta(m) => Some(m), + _ => None, + }) + .filter(|m| m.path().is_ident("token")) + .filter_map(|meta| match meta { + Meta::NameValue(mnv) => Some(mnv), + _ => None, + }) + .filter_map(|mnv| match mnv.lit { + Lit::Int(s) => s.base10_parse::().ok(), + _ => None, + }) + .next() +} + fn ungroup(mut ty: &Type) -> &Type { while let Type::Group(group) = ty { ty = &group.elem; @@ -369,6 +395,36 @@ pub fn derive(input: TokenStream) -> TokenStream { } }); + let field_enum_token_match = named_fields.named.iter().filter_map(|f| { + let name = &f.ident; + if let Some(match_arm) = binary_token(f) { + let field_ident = quote! { __Field::#name }; + Some(quote! { + #match_arm => Ok(#field_ident), + }) + } else { + None + } + }); + + let token_count = named_fields.named.iter().filter_map(binary_token).count(); + if token_count > 0 && token_count < named_fields.named.len() { + panic!( + "{} does not have #[jomini(token = x)] defined for all fields", + struct_ident + ) + } + + let deser_request = if token_count > 0 { + quote! { + ::serde::Deserializer::deserialize_u16(__deserializer, __FieldVisitor) + } + } else { + quote! { + ::serde::Deserializer::deserialize_identifier(__deserializer, __FieldVisitor) + } + }; + let expecting = format!("struct {}", struct_ident); let struct_ident_str = struct_ident.to_string(); @@ -415,6 +471,18 @@ pub fn derive(input: TokenStream) -> TokenStream { _ => Ok(__Field::__ignore), } } + fn visit_u16<__E>( + self, + __value: u16, + ) -> ::std::result::Result + where + __E: ::serde::de::Error, + { + match __value { + #(#field_enum_token_match)* + _ => Ok(__Field::__ignore), + } + } } impl<'de> serde::Deserialize<'de> for __Field { @@ -425,7 +493,7 @@ pub fn derive(input: TokenStream) -> TokenStream { where __D: ::serde::Deserializer<'de>, { - ::serde::Deserializer::deserialize_identifier(__deserializer, __FieldVisitor) + #deser_request } } diff --git a/jomini_derive/tests/compile-fail/tokens.rs b/jomini_derive/tests/compile-fail/tokens.rs new file mode 100644 index 0000000..2f4443c --- /dev/null +++ b/jomini_derive/tests/compile-fail/tokens.rs @@ -0,0 +1,13 @@ +#[allow(dead_code)] +use jomini_derive::JominiDeserialize; + +#[derive(JominiDeserialize)] +pub struct Model { + #[jomini(token = 0x10)] + human: bool, + #[jomini(token = 0x11)] + checksum: String, + fourth: u16, +} + +fn main() {} diff --git a/jomini_derive/tests/compile-fail/tokens.stderr b/jomini_derive/tests/compile-fail/tokens.stderr new file mode 100644 index 0000000..8348e8d --- /dev/null +++ b/jomini_derive/tests/compile-fail/tokens.stderr @@ -0,0 +1,7 @@ +error: proc-macro derive panicked + --> tests/compile-fail/tokens.rs:4:10 + | +4 | #[derive(JominiDeserialize)] + | ^^^^^^^^^^^^^^^^^ + | + = help: message: Model does not have #[jomini(token = x)] defined for all fields diff --git a/jomini_derive/tests/compiletest.rs b/jomini_derive/tests/compiletest.rs index 9faa9c0..334fa02 100644 --- a/jomini_derive/tests/compiletest.rs +++ b/jomini_derive/tests/compiletest.rs @@ -2,4 +2,5 @@ fn tests() { let t = trybuild::TestCases::new(); t.pass("tests/01-parse.rs"); + t.compile_fail("tests/compile-fail/*.rs"); } diff --git a/src/binary/de.rs b/src/binary/de.rs index 865fede..315e1f6 100644 --- a/src/binary/de.rs +++ b/src/binary/de.rs @@ -465,7 +465,6 @@ impl<'a, 'de: 'a, 'res: 'de, RES: TokenResolver, F: BinaryFlavor> de::Deserializ deserialize_scalar!(deserialize_i8); deserialize_scalar!(deserialize_i16); deserialize_scalar!(deserialize_u8); - deserialize_scalar!(deserialize_u16); deserialize_scalar!(deserialize_char); deserialize_scalar!(deserialize_identifier); deserialize_scalar!(deserialize_bytes); @@ -482,6 +481,17 @@ impl<'a, 'de: 'a, 'res: 'de, RES: TokenResolver, F: BinaryFlavor> de::Deserializ } } + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.token { + QUOTED_STRING | UNQUOTED_STRING | U32 | I32 | U64 | I64 | BOOL | F32 | F64 | OPEN + | END | EQUAL => self.deser(visitor), + x => visitor.visit_u16(x), + } + } + fn deserialize_i32(self, visitor: V) -> Result where V: Visitor<'de>, @@ -1168,6 +1178,17 @@ impl<'b, 'de, 'res: 'de, RES: TokenResolver, E: BinaryFlavor> de::Deserializer<' { type Error = Error; + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if let BinaryToken::Token(x) = self.tokens[self.tape_idx] { + visitor.visit_u16(x) + } else { + visit_key(self.tape_idx, self.tokens, self.config, visitor) + } + } + fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, @@ -1176,7 +1197,7 @@ impl<'b, 'de, 'res: 'de, RES: TokenResolver, E: BinaryFlavor> de::Deserializer<' } serde::forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bool i8 i16 i32 i64 i128 u8 u32 u64 u128 f32 f64 char str string bytes byte_buf option unit unit_struct newtype_struct seq tuple tuple_struct map enum ignored_any identifier struct } @@ -1818,6 +1839,34 @@ mod tests { ); } + #[test] + fn test_token_visit() { + let data = [ + 0x82, 0x2d, 0x01, 0x00, 0x17, 0x00, 0x03, 0x00, 0x45, 0x4e, 0x47, + ]; + + #[derive(JominiDeserialize, PartialEq, Debug)] + struct MyStruct { + #[jomini(token = 0x2d82)] + field1: String, + } + + struct NullResolver; + impl TokenResolver for NullResolver { + fn resolve(&self, _token: u16) -> Option<&str> { + None + } + } + + let actual: MyStruct = from_slice(&data[..], &NullResolver).unwrap(); + assert_eq!( + actual, + MyStruct { + field1: String::from("ENG"), + } + ); + } + #[test] fn test_multiple_top_level_events() { let data = [ diff --git a/src/lib.rs b/src/lib.rs index d7f3d05..9e81656 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,6 +181,61 @@ assert_eq!(actual, MyStruct { field1: "ENG".to_string() }); # Ok::<(), Box>(()) ``` +### Direct identifier deserialization with `token` attribute + +There may be some performance loss during binary deserialization as +tokens are resolved to strings via a `TokenResolver` and then matched against the +string representations of a struct's fields. + +We can fix this issue by directly encoding the expected token value into the struct: + +```rust +# #[cfg(feature = "derive")] { +# use jomini::{Encoding, JominiDeserialize, Windows1252Encoding, BinaryDeserializer}; +# use std::{borrow::Cow, collections::HashMap}; +# +# #[derive(Debug, Default)] +# pub struct BinaryTestFlavor; +# +# impl jomini::binary::BinaryFlavor for BinaryTestFlavor { +# fn visit_f32(&self, data: [u8; 4]) -> f32 { +# f32::from_le_bytes(data) +# } +# +# fn visit_f64(&self, data: [u8; 8]) -> f64 { +# f64::from_le_bytes(data) +# } +# } +# +# impl Encoding for BinaryTestFlavor { +# fn decode<'a>(&self, data: &'a [u8]) -> Cow<'a, str> { +# Windows1252Encoding::decode(data) +# } +# } +# +# let data = [ 0x82, 0x2d, 0x01, 0x00, 0x0f, 0x00, 0x03, 0x00, 0x45, 0x4e, 0x47 ]; +# +#[derive(JominiDeserialize, PartialEq, Debug)] +struct MyStruct { + #[jomini(token = 0x2d82)] + field1: String, +} + +// Empty token to string resolver +let map = HashMap::::new(); + +let actual: MyStruct = BinaryDeserializer::builder_flavor(BinaryTestFlavor) + .deserialize_slice(&data[..], &map)?; +assert_eq!(actual, MyStruct { field1: "ENG".to_string() }); +# } +# Ok::<(), Box>(()) +``` + +Couple notes: + +- This does not obviate need for the token to string resolver as tokens may be used as values. +- If the `token` attribute is specified on one field on a struct, it must be specified on all fields of that struct. + ## Caveats Caller is responsible for: diff --git a/tests/de.rs b/tests/de.rs index 0f9e404..df3edb4 100644 --- a/tests/de.rs +++ b/tests/de.rs @@ -1,7 +1,8 @@ #![cfg(feature = "derive")] use jomini::{ - binary::BinaryFlavor, common::PdsDate, BinaryDeserializer, Encoding, Windows1252Encoding, + binary::BinaryFlavor, common::PdsDate, BinaryDeserializer, Encoding, JominiDeserialize, + Windows1252Encoding, }; use serde::{ de::{self, Visitor}, @@ -285,6 +286,39 @@ fn test_binary_meta_deserialization() { assert_eq!(actual.savegame_version.0, String::from("1.29.4.0")); } +#[test] +fn test_token_attribute_deserialization() { + #[derive(JominiDeserialize, Debug, Clone, PartialEq)] + struct Meta { + #[jomini(token = 0x284d)] + date: jomini::common::Date, + #[jomini(token = 0x2a38)] + player: String, + } + + let data = include_bytes!("./fixtures/meta.bin"); + let data = &data["EU4bin".len()..]; + let mut hash = create_bin_lookup(); + hash.remove(&0x284d); + hash.remove(&0x2a38); + let actual: Meta = BinaryDeserializer::builder_flavor(BinaryTestFlavor) + .deserialize_slice(&data, &hash) + .unwrap(); + assert_eq!( + actual.date.game_fmt().to_string(), + String::from("1597.1.15") + ); + assert_eq!(&actual.player, "RAG"); + + let data = include_bytes!("./fixtures/meta.txt"); + let actual: Meta = jomini::text::de::from_windows1252_slice(&data["EU4txt".len()..]).unwrap(); + assert_eq!( + actual.date.game_fmt().to_string(), + String::from("1444.11.11") + ); + assert_eq!(&actual.player, "ENG"); +} + #[test] fn test_binary_meta_deserialization_boxed() { let data = include_bytes!("./fixtures/meta.bin");