From 5dc9449c6c9aee6ce17ee249f9fe01edd2f37cac Mon Sep 17 00:00:00 2001 From: Andrew Consroe Date: Thu, 15 Aug 2024 17:55:27 -0500 Subject: [PATCH] misc: Reduce allocations and streamline copies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add AmlSink::vec specialization for Vec so we can memcpy Convert calls like: `for byte in bytes { sink.byte(byte); }` into `sink.vec(bytes)` Remove all calls to Vec::insert by reordering appends or doing a single memmove and slice copy Evaluation done by timing the tests and tracking allocations with valgrind's dhat, both for heap tracking and memcopy tracking. after/before for total blocks allocated is 6588/8029 = 82% after/before fastest run time is 4.6/5.9 = 78% ``` set -e for x in main allocation_reduction; do git checkout $x echo "=== $x ===" cargo test --release --tests --no-run &>/dev/null exe=$(find target/release/deps -executable -name 'acpi_tables*') sha256sum ${exe} taskset -c 0 hyperfine --shell=none "${exe} --test-threads 1" valgrind --tool=dhat --mode=heap --dhat-out-file=/dev/null ${exe} \ --test-threads 1 > /dev/null valgrind --tool=dhat --mode=copy --dhat-out-file=/dev/null ${exe} \ --test-threads 1 > /dev/null done ``` And the output (with manual removal of extraneous output) ``` === main === Benchmark 1: target/release/deps/acpi_tables-44d55ae4eaed1824 --test-threads 1 Time (mean ± σ): 6.2 ms ± 0.1 ms [User: 2.0 ms, System: 2.9 ms] Range (min … max): 5.9 ms … 6.7 ms 474 runs DHAT --mode=heap Total: 2,452,242 bytes in 8,029 blocks At t-gmax: 75,063 bytes in 654 blocks At t-end: 0 bytes in 0 blocks Reads: 2,953,771 bytes Writes: 2,194,154 bytes DHAT --mode=copy Total: 488,009 bytes in 4,448 blocks === allocation_reduction === Benchmark 1: target/release/deps/acpi_tables-44d55ae4eaed1824 --test-threads 1 Time (mean ± σ): 5.0 ms ± 0.2 ms [User: 1.0 ms, System: 2.8 ms] Range (min … max): 4.6 ms … 5.5 ms 587 runs DHAT --mode=heap Total: 2,410,141 bytes in 6,588 blocks At t-gmax: 75,575 bytes in 654 blocks At t-end: 0 bytes in 0 blocks Reads: 2,920,895 bytes Writes: 2,160,911 bytes DHAT --mode=copy Total: 1,149,621 bytes in 25,894 blocks ``` Signed-off-by: Andrew Consroe --- src/aml.rs | 133 +++++++++++++++++++--------------------------------- src/cedt.rs | 4 +- src/fadt.rs | 4 +- src/lib.rs | 20 ++++---- src/madt.rs | 4 +- src/mcfg.rs | 4 +- src/pptt.rs | 4 +- src/rhct.rs | 8 +--- src/rimt.rs | 4 +- src/rqsc.rs | 4 +- src/slit.rs | 4 +- src/spcr.rs | 12 ++--- src/srat.rs | 12 ++--- src/tpm2.rs | 9 ++-- src/xsdt.rs | 4 +- 15 files changed, 75 insertions(+), 155 deletions(-) diff --git a/src/aml.rs b/src/aml.rs index fb4fb80..0b15ad4 100644 --- a/src/aml.rs +++ b/src/aml.rs @@ -141,9 +141,7 @@ impl Aml for Path { }; for part in self.name_parts.clone().iter_mut() { - for byte in part { - sink.byte(*byte); - } + sink.vec(part); } } } @@ -233,9 +231,7 @@ pub struct Name { impl Aml for Name { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in &self.bytes { - sink.byte(*byte); - } + sink.vec(&self.bytes); } } @@ -273,13 +269,10 @@ impl<'a> Aml for Package<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(PACKAGEOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -312,6 +305,10 @@ impl AmlSink for PackageBuilder { fn byte(&mut self, byte: u8) { self.data.push(byte); } + + fn vec(&mut self, v: &[u8]) { + self.data.extend_from_slice(v); + } } impl PackageBuilder { @@ -345,13 +342,10 @@ impl<'a> Aml for VarPackageTerm<'a> { let mut bytes = Vec::new(); self.data.to_aml_bytes(&mut bytes); - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(VARPACKAGEOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -380,7 +374,7 @@ package length is 2**28." /* Also used for NamedField but in that case the length is not included in itself */ fn create_pkg_length(len: usize, include_self: bool) -> Vec { - let mut result = Vec::new(); + let mut result = Vec::with_capacity(4); /* PkgLength is inclusive and includes the length bytes */ let length_length = if len < (2usize.pow(6) - 1) { @@ -466,9 +460,7 @@ impl Aml for Usize { fn create_aml_string(v: &str, sink: &mut dyn AmlSink) { sink.byte(STRINGOP); - for byte in v.as_bytes() { - sink.byte(*byte); - } + sink.vec(v.as_bytes()); sink.byte(0x0); /* NullChar */ } @@ -510,21 +502,15 @@ impl<'a> Aml for ResourceTemplate<'a> { // Buffer length is an encoded integer including buffer data // and EndTag and checksum byte - let mut buffer_length = Vec::new(); + let mut buffer_length = Vec::with_capacity(4); bytes.len().to_aml_bytes(&mut buffer_length); - buffer_length.reverse(); - for byte in buffer_length { - bytes.insert(0, byte); - } // PkgLength is everything else - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len() + buffer_length.len(), true); sink.byte(BUFFEROP); + sink.vec(&pkg_length); + sink.vec(&buffer_length); sink.vec(&bytes); } } @@ -806,14 +792,11 @@ impl<'a> Aml for Device<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(EXTOPPREFIX); /* ExtOpPrefix */ sink.byte(DEVICEOP); /* DeviceOp */ + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -839,13 +822,10 @@ impl<'a> Aml for Scope<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(SCOPEOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -859,14 +839,19 @@ impl<'a> Scope<'a> { /// Create raw bytes representing a Scope from its children in raw bytes pub fn raw(path: Path, mut children: Vec) -> Vec { let mut bytes = Vec::new(); + bytes.push(SCOPEOP); path.to_aml_bytes(&mut bytes); bytes.append(&mut children); - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } - bytes.insert(0, SCOPEOP); + + let n = bytes.len(); // n >= 1 + let pkg_length = create_pkg_length(n - 1, true); + let m = pkg_length.len(); + + // move everything after the SCOPEOP over and copy in pkg_length + bytes.resize(n + m, 0xFF); + bytes.as_mut_slice().copy_within(1..n, m + 1); + bytes.as_mut_slice()[1..m + 1].copy_from_slice(pkg_length.as_slice()); + bytes } } @@ -901,13 +886,10 @@ impl<'a> Aml for Method<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(METHODOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -996,14 +978,11 @@ impl Aml for Field { } } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(EXTOPPREFIX); sink.byte(FIELDOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1078,13 +1057,10 @@ impl<'a> Aml for If<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(IFOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1108,13 +1084,10 @@ impl<'a> Aml for Else<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(ELSEOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1310,13 +1283,10 @@ impl<'a> Aml for While<'a> { child.to_aml_bytes(&mut bytes); } - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(WHILEOP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1530,13 +1500,10 @@ impl<'a> Aml for BufferTerm<'a> { let mut bytes = Vec::new(); self.data.to_aml_bytes(&mut bytes); - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(BUFFEROP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1559,13 +1526,10 @@ impl Aml for BufferData { self.data.len().to_aml_bytes(&mut bytes); bytes.extend_from_slice(&self.data); - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(BUFFEROP); + sink.vec(&pkg_length); sink.vec(&bytes); } } @@ -1683,14 +1647,11 @@ impl<'a> Aml for PowerResource<'a> { } // PkgLength - let mut pkg_length = create_pkg_length(bytes.len(), true); - pkg_length.reverse(); - for byte in pkg_length { - bytes.insert(0, byte); - } + let pkg_length = create_pkg_length(bytes.len(), true); sink.byte(POWERRESOURCEOP); sink.byte(EXTOPPREFIX); + sink.vec(&pkg_length); sink.vec(&bytes); } } diff --git a/src/cedt.rs b/src/cedt.rs index 6c9b25f..cb5561a 100644 --- a/src/cedt.rs +++ b/src/cedt.rs @@ -302,9 +302,7 @@ impl Aml for CxlFixedMemory { sink.word(self.window_restrictions); sink.word(self.qtg_id); for target in &self.interleave_targets { - for byte in target { - sink.byte(*byte); - } + sink.vec(target); } } } diff --git a/src/fadt.rs b/src/fadt.rs index d78a31e..da240cc 100644 --- a/src/fadt.rs +++ b/src/fadt.rs @@ -147,9 +147,7 @@ impl FADT { impl Aml for FADT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.table.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.table.as_bytes()); } } diff --git a/src/lib.rs b/src/lib.rs index 91722e1..55ac2cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,21 +49,15 @@ pub trait AmlSink { fn byte(&mut self, byte: u8); fn word(&mut self, word: u16) { - for byte in word.to_le_bytes() { - self.byte(byte); - } + self.vec(&word.to_le_bytes()); } fn dword(&mut self, dword: u32) { - for byte in dword.to_le_bytes() { - self.byte(byte); - } + self.vec(&dword.to_le_bytes()); } fn qword(&mut self, qword: u64) { - for byte in qword.to_le_bytes() { - self.byte(byte); - } + self.vec(&qword.to_le_bytes()); } fn vec(&mut self, v: &[u8]) { @@ -87,6 +81,10 @@ impl AmlSink for alloc::vec::Vec { fn byte(&mut self, byte: u8) { self.push(byte); } + + fn vec(&mut self, v: &[u8]) { + self.extend_from_slice(v); + } } /// Standard header for many ACPI tables @@ -187,9 +185,7 @@ macro_rules! aml_as_bytes { ($x:ty) => { impl Aml for $x { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.as_bytes()); } } }; diff --git a/src/madt.rs b/src/madt.rs index a24eec4..b0c05af 100644 --- a/src/madt.rs +++ b/src/madt.rs @@ -126,9 +126,7 @@ impl MADT { impl Aml for MADT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); for st in &self.structures { st.to_aml_bytes(sink); diff --git a/src/mcfg.rs b/src/mcfg.rs index 7425134..a8e9ac4 100644 --- a/src/mcfg.rs +++ b/src/mcfg.rs @@ -73,9 +73,7 @@ impl MCFG { impl Aml for MCFG { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); // 8 reserved bytes sink.qword(0); diff --git a/src/pptt.rs b/src/pptt.rs index b9b17c1..f84cd42 100644 --- a/src/pptt.rs +++ b/src/pptt.rs @@ -77,9 +77,7 @@ impl PPTT { impl Aml for PPTT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); for st in &self.structures { st.to_aml_bytes(sink); diff --git a/src/rhct.rs b/src/rhct.rs index 507f575..7401a9d 100644 --- a/src/rhct.rs +++ b/src/rhct.rs @@ -135,9 +135,7 @@ impl RHCT { impl Aml for RHCT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); for st in &self.structures { st.to_aml_bytes(sink); @@ -190,9 +188,7 @@ impl Aml for IsaStringNode { sink.word(self.len() as u16); sink.word(Self::REVISION); sink.word(strlen); - for byte in self.string.bytes() { - sink.byte(byte); - } + sink.vec(self.string.as_bytes()); sink.byte(0); // NULL terminator if padding_reqd { sink.byte(0); diff --git a/src/rimt.rs b/src/rimt.rs index edeb92f..f41eaad 100644 --- a/src/rimt.rs +++ b/src/rimt.rs @@ -99,9 +99,7 @@ impl RIMT { /// topology and the IOMMU. impl Aml for RIMT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); sink.dword(self.devices.len() as u32); sink.dword(Self::DEVICE_OFFSET); diff --git a/src/rqsc.rs b/src/rqsc.rs index 54f1560..65710da 100644 --- a/src/rqsc.rs +++ b/src/rqsc.rs @@ -80,9 +80,7 @@ impl RQSC { impl Aml for RQSC { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); sink.dword(self.structures.len() as u32); for st in &self.structures { diff --git a/src/slit.rs b/src/slit.rs index c934258..a70e9c7 100644 --- a/src/slit.rs +++ b/src/slit.rs @@ -79,9 +79,7 @@ impl SLIT { impl Aml for SLIT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); sink.qword(self.localities as u64); for entry in &self.entries { diff --git a/src/spcr.rs b/src/spcr.rs index ae6d1cd..7bd2414 100644 --- a/src/spcr.rs +++ b/src/spcr.rs @@ -53,15 +53,9 @@ impl SPCR<'_> { impl Aml for SPCR<'_> { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } - for byte in self.info.as_bytes() { - sink.byte(*byte); - } - for byte in self.namespace_string { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); + sink.vec(self.info.as_bytes()); + sink.vec(self.namespace_string); } } diff --git a/src/srat.rs b/src/srat.rs index 9bf681e..d8234ae 100644 --- a/src/srat.rs +++ b/src/srat.rs @@ -81,9 +81,7 @@ impl SRAT { impl Aml for SRAT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); sink.dword(1); // reserved to be 1 for backward compatibility. sink.qword(0); // reserved @@ -206,12 +204,8 @@ impl Aml for Handle { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { match self { Handle::Acpi { hid, uid } => { - for byte in hid { - sink.byte(*byte); - } - for byte in uid { - sink.byte(*byte); - } + sink.vec(hid); + sink.vec(uid); sink.dword(0); // reserved } diff --git a/src/tpm2.rs b/src/tpm2.rs index 51f36a4..4fe232a 100644 --- a/src/tpm2.rs +++ b/src/tpm2.rs @@ -192,9 +192,7 @@ crate::assert_same_size!(TpmServer1_2, [u8; 100]); impl Aml for TpmServer1_2 { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.as_bytes()); } } @@ -290,9 +288,8 @@ impl Aml for Tpm2 { sink.word(0); // reserved sink.qword(self.crb_or_fifo_base); sink.dword(self.start_method as u32); - for byte in &self.start_method_params[0..self.start_method_param_len] { - sink.byte(*byte); - } + sink.vec(&self.start_method_params[0..self.start_method_param_len]); + if let Some(laml) = self.log_area_min_len { sink.dword(laml); } diff --git a/src/xsdt.rs b/src/xsdt.rs index 6c7a373..e972ba6 100644 --- a/src/xsdt.rs +++ b/src/xsdt.rs @@ -60,9 +60,7 @@ impl XSDT { impl Aml for XSDT { fn to_aml_bytes(&self, sink: &mut dyn AmlSink) { - for byte in self.header.as_bytes() { - sink.byte(*byte); - } + sink.vec(self.header.as_bytes()); for entry in &self.entries { sink.qword(*entry);