redballoonsecurity · alchzh · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/ofrak_core/ofrak/core/binary.py b/ofrak_core/ofrak/core/binary.py
@@ -41,9 +41,8 @@ class BinaryExtendModifier(Modifier[BinaryExtendConfig]):
     async def modify(self, resource: Resource, config: BinaryExtendConfig):
         if len(config.content) == 0:
             raise ValueError("Content of the extended space not provided")
-        data = await resource.get_data()
-        data += config.content
-        resource.queue_patch(Range(0, await resource.get_data_length()), data)
+        orig_data_length = await resource.get_data_length()
+        resource.queue_patch(Range(orig_data_length, orig_data_length), config.content)
 
 
 @dataclass

diff --git a/ofrak_core/ofrak/core/bzip2.py b/ofrak_core/ofrak/core/bzip2.py
@@ -36,8 +36,8 @@ async def unpack(self, resource: Resource, config=None):
         :param resource:
         :param config:
         """
-        resource_data = await resource.get_data()
-        decompressed_data = bz2.decompress(resource_data)
+        with await resource.get_data_memoryview() as resource_data:
+            decompressed_data = bz2.decompress(resource_data)
         await resource.create_child(
             tags=(GenericBinary,),
             data=decompressed_data,
@@ -59,7 +59,8 @@ async def pack(self, resource: Resource, config=None):
         :param config:
         """
         bzip2_child = await resource.get_only_child()
-        bzip2_compressed = bz2.compress(await bzip2_child.get_data())
+        with await bzip2_child.get_data_memoryview() as buffer:
+            bzip2_compressed = bz2.compress(buffer)
         original_size = await resource.get_data_length()
         resource.queue_patch(Range(0, original_size), bzip2_compressed)
 

diff --git a/ofrak_core/ofrak/core/checksum.py b/ofrak_core/ofrak/core/checksum.py
@@ -23,9 +23,9 @@ class Sha256Analyzer(Analyzer[None, Sha256Attributes]):
     outputs = (Sha256Attributes,)
 
     async def analyze(self, resource: Resource, config=None) -> Sha256Attributes:
-        data = await resource.get_data()
         sha256 = hashlib.sha256()
-        sha256.update(data)
+        with await resource.get_data_memoryview() as data:
+            sha256.update(data)
         return Sha256Attributes(sha256.hexdigest())
 
 
@@ -43,7 +43,7 @@ class Md5Analyzer(Analyzer[None, Md5Attributes]):
     outputs = (Md5Attributes,)
 
     async def analyze(self, resource: Resource, config=None) -> Md5Attributes:
-        data = await resource.get_data()
         md5 = hashlib.md5()
-        md5.update(data)
+        with await resource.get_data_memoryview() as data:
+            md5.update(data)
         return Md5Attributes(md5.hexdigest())
diff --git a/ofrak_core/ofrak/core/comments.py b/ofrak_core/ofrak/core/comments.py
@@ -34,7 +34,7 @@ async def modify(self, resource: Resource, config: AddCommentModifierConfig) ->
         # Verify that the given range is valid for the given resource.
         config_range = config.comment[0]
         if config_range is not None:
-            if config_range.start < 0 or config_range.end > len(await resource.get_data()):
+            if config_range.start < 0 or config_range.end > await resource.get_data_length():
                 raise ValueError(
                     f"Range {config_range} is outside the bounds of "
                     f"resource {resource.get_id().hex()}"

diff --git a/ofrak_core/ofrak/core/cpio.py b/ofrak_core/ofrak/core/cpio.py
@@ -86,7 +86,6 @@ class CpioUnpacker(Unpacker[None]):
 
     async def unpack(self, resource: Resource, config=None):
         cpio_v = await resource.view_as(CpioFilesystem)
-        resource_data = await cpio_v.resource.get_data()
         with tempfile.TemporaryDirectory() as temp_flush_dir:
             cmd = [
                 "cpio",
@@ -99,7 +98,8 @@ async def unpack(self, resource: Resource, config=None):
                 stderr=asyncio.subprocess.PIPE,
                 cwd=temp_flush_dir,
             )
-            await proc.communicate(input=resource_data)
+            with await resource.get_data_memoryview() as resource_data:
+                await proc.communicate(input=resource_data)
             # if proc.returncode:
             #     raise CalledProcessError(returncode=proc.returncode, cmd=cmd)
             await cpio_v.initialize_from_disk(temp_flush_dir)

diff --git a/ofrak_core/ofrak/core/data.py b/ofrak_core/ofrak/core/data.py
@@ -23,12 +23,12 @@ class DataWord(MemoryRegion):
     xrefs_to: Tuple[int, ...]
 
     async def get_value_unsigned(self) -> int:
-        data = await self.resource.get_data()
-        return struct.unpack(self.format_string.upper(), data)[0]
+        with await self.resource.get_data_memoryview() as data:
+            return struct.unpack(self.format_string.upper(), data)[0]
 
     async def get_value_signed(self) -> int:
-        data = await self.resource.get_data()
-        return struct.unpack(self.format_string.lower(), data)[0]
+        with await self.resource.get_data_memoryview() as data:
+            return struct.unpack(self.format_string.lower(), data)[0]
 
 
 @dataclass(**ResourceAttributes.DATACLASS_PARAMS)

diff --git a/ofrak_core/ofrak/core/dtb.py b/ofrak_core/ofrak/core/dtb.py
@@ -102,30 +102,30 @@ class DtbHeaderAnalyzer(Analyzer[None, DtbHeader]):
     outputs = (DtbHeader,)
 
     async def analyze(self, resource: Resource, config: None) -> DtbHeader:
-        header_data = await resource.get_data()
-        (
-            dtb_magic,
-            totalsize,
-            off_dt_struct,
-            off_dt_strings,
-            off_mem_rsvmap,
-            version,
-            last_comp_version,
-        ) = struct.unpack(">IIIIIII", header_data[:28])
-        assert dtb_magic == DTB_MAGIC_SIGNATURE, (
-            f"DTB Magic bytes not matching."
-            f"Expected: {DTB_MAGIC_SIGNATURE} "
-            f"Unpacked: {dtb_magic}"
-        )
-        boot_cpuid_phys = 0
-        dtb_strings_size = 0
-        dtb_struct_size = 0
-        if version >= 2:
-            boot_cpuid_phys = struct.unpack(">I", header_data[28:32])[0]
-        if version >= 3:
-            dtb_strings_size = struct.unpack(">I", header_data[32:36])[0]
-        if version >= 17:
-            dtb_struct_size = struct.unpack(">I", header_data[36:40])[0]
+        with await resource.get_data_memoryview(Range(0, 40)) as header_data:
+            (
+                dtb_magic,
+                totalsize,
+                off_dt_struct,
+                off_dt_strings,
+                off_mem_rsvmap,
+                version,
+                last_comp_version,
+            ) = struct.unpack(">IIIIIII", header_data[:28])
+            assert dtb_magic == DTB_MAGIC_SIGNATURE, (
+                f"DTB Magic bytes not matching."
+                f"Expected: {DTB_MAGIC_SIGNATURE} "
+                f"Unpacked: {dtb_magic}"
+            )
+            boot_cpuid_phys = 0
+            dtb_strings_size = 0
+            dtb_struct_size = 0
+            if version >= 2:
+                boot_cpuid_phys = struct.unpack(">I", header_data[28:32])[0]
+            if version >= 3:
+                dtb_strings_size = struct.unpack(">I", header_data[32:36])[0]
+            if version >= 17:
+                dtb_struct_size = struct.unpack(">I", header_data[36:40])[0]
 
         return DtbHeader(
             dtb_magic,

diff --git a/ofrak_core/ofrak/core/elf/analyzer.py b/ofrak_core/ofrak/core/elf/analyzer.py
@@ -50,8 +50,8 @@ class ElfBasicHeaderAttributesAnalyzer(Analyzer[None, ElfBasicHeader]):
     outputs = (ElfBasicHeader,)
 
     async def analyze(self, resource: Resource, config=None) -> ElfBasicHeader:
-        tmp = await resource.get_data()
-        deserializer = BinaryDeserializer(io.BytesIO(tmp))
+        with await resource.get_data_memoryview() as tmp:
+            deserializer = BinaryDeserializer(io.BytesIO(tmp))
         (
             ei_magic,
             ei_class,

diff --git a/ofrak_core/ofrak/core/entropy/entropy.py b/ofrak_core/ofrak/core/entropy/entropy.py
@@ -76,7 +76,7 @@ async def analyze(self, resource: Resource, config=None, depth=0) -> DataSummary
 
 
 def sample_entropy(
-    data: bytes, resource_id: bytes, window_size=256, max_samples=2**20
+    data: bytearray, resource_id: bytes, window_size=256, max_samples=2**20
 ) -> bytes:  # pragma: no cover
     """
     Return a list of entropy values where each value represents the Shannon entropy of the byte

diff --git a/ofrak_core/ofrak/core/entropy/entropy_c.py b/ofrak_core/ofrak/core/entropy/entropy_c.py
@@ -33,7 +33,8 @@ def entropy_c(
     if len(data) <= window_size:
         return b""
     entropy = ctypes.create_string_buffer(len(data) - window_size)
-    errval = C_ENTROPY_FUNC(data, len(data), entropy, window_size, C_LOG_TYPE(log_percent))
+    buffer = (ctypes.c_char * len(data)).from_buffer_copy(data)
+    errval = C_ENTROPY_FUNC(buffer, len(data), entropy, window_size, C_LOG_TYPE(log_percent))
     if errval != 0:
         raise ValueError("Bad input to entropy function.")
-    return bytes(entropy.raw)
+    return entropy.raw
diff --git a/ofrak_core/ofrak/core/entropy/entropy_py.py b/ofrak_core/ofrak/core/entropy/entropy_py.py
@@ -25,7 +25,7 @@ def entropy_py(
         histogram[b] += 1
 
     # Calculate the entropy using a sliding window
-    entropy = [0] * (len(data) - window_size)
+    entropy = bytearray(max(0, len(data) - window_size))
     last_percent_logged = 0
     for i in range(len(entropy)):
         entropy[i] = math.floor(255 * _shannon_entropy(histogram, window_size))
@@ -35,7 +35,7 @@ def entropy_py(
         if percent > last_percent_logged and percent % 10 == 0:
             log_percent(percent)
             last_percent_logged = percent
-    return bytes(entropy)
+    return entropy
 
 
 def _shannon_entropy(distribution: List[int], window_size: int) -> float:

diff --git a/ofrak_core/ofrak/core/filesystem.py b/ofrak_core/ofrak/core/filesystem.py
@@ -214,7 +214,7 @@ async def flush_to_disk(self, root_path: str = ".", filename: Optional[str] = No
         elif self.is_file():
             file_name = os.path.join(root_path, entry_path)
             with open(file_name, "wb") as f:
-                f.write(await self.resource.get_data())
+                await self.resource.write_to(f, pack=False)
             self.apply_stat_attrs(file_name)
         elif self.is_device():
             device_name = os.path.join(root_path, entry_path)

diff --git a/ofrak_core/ofrak/core/flash.py b/ofrak_core/ofrak/core/flash.py
@@ -426,88 +426,91 @@ async def unpack(self, resource: Resource, config=None):
 
         oob_resource = resource
         # Parent FlashEccResource is created, redefine data to limited scope
-        data = await oob_resource.get_data()
-        data_len = len(data)
-
-        # Now add children blocks until we reach the tail block
-        offset = 0
-        only_data = list()
-        only_ecc = list()
-        for block in flash_attr.iterate_through_all_blocks(data_len, True):
-            block_size = flash_attr.get_block_size(block)
-            block_end_offset = offset + block_size
-            if block_end_offset > data_len:
-                LOGGER.info(
-                    f"Block offset {block_end_offset} is {block_end_offset - data_len} larger "
-                    f"than {data_len}. In this case unpacking is best effort and end of unpacked "
-                    f"child might not be accurate."
-                )
-                break
-            block_range = Range(offset, block_end_offset)
-            block_data = await oob_resource.get_data(range=block_range)
-
-            # Iterate through every field in block, dealing with ECC and DATA
-            block_ecc_range = None
-            block_data_range = None
-            field_offset = 0
-            for field_index, field in enumerate(block):
-                field_range = Range(field_offset, field_offset + field.size)
-
-                # We must check all blocks anyway so deal with ECC here
-                if field.field_type == FlashFieldType.ECC:
-                    block_ecc_range = field_range
-                    cur_block_ecc = block_data[block_ecc_range.start : block_ecc_range.end]
-                    only_ecc.append(cur_block_ecc)
-                    # Add hash of everything up to the ECC to our dict for faster packing
-                    block_data_hash = md5(block_data[: block_ecc_range.start]).digest()
-                    DATA_HASHES[block_data_hash] = cur_block_ecc
-
-                if field.field_type == FlashFieldType.DATA:
-                    block_data_range = field_range
-                    # Get next ECC range
-                    future_offset = field_offset
-                    block_list = list(block)
-                    for future_field in block_list[field_index:]:
-                        if future_field.field_type == FlashFieldType.ECC:
-                            block_ecc_range = Range(
-                                future_offset, future_offset + future_field.size
-                            )
-                        future_offset += future_field.size
-
-                    if block_ecc_range is not None:
-                        # Try decoding/correcting with ECC, report any error
-                        try:
-                            # Assumes that data comes before ECC
-                            if (ecc_attr is not None) and (ecc_attr.ecc_class is not None):
-                                only_data.append(
-                                    ecc_attr.ecc_class.decode(block_data[: block_ecc_range.end])[
-                                        block_data_range.start : block_data_range.end
-                                    ]
-                                )
-                            else:
-                                raise UnpackerError(
-                                    "Tried to correct with ECC without providing an ecc_class in FlashEccAttributes"
+        with await oob_resource.get_data_memoryview() as data:
+            data_len = len(data)
+
+            # Now add children blocks until we reach the tail block
+            offset = 0
+            only_data = bytearray()
+            only_ecc = bytearray()
+            for block in flash_attr.iterate_through_all_blocks(data_len, True):
+                block_size = flash_attr.get_block_size(block)
+                block_end_offset = offset + block_size
+                if block_end_offset > data_len:
+                    LOGGER.info(
+                        f"Block offset {block_end_offset} is {block_end_offset - data_len} larger "
+                        f"than {data_len}. In this case unpacking is best effort and end of unpacked "
+                        f"child might not be accurate."
+                    )
+                    break
+                with data[offset:block_end_offset] as block_memview:
+                    block_data = bytes(block_memview)
+
+                # Iterate through every field in block, dealing with ECC and DATA
+                block_ecc_range = None
+                block_data_range = None
+                field_offset = 0
+                for field_index, field in enumerate(block):
+                    field_range = Range(field_offset, field_offset + field.size)
+
+                    # We must check all blocks anyway so deal with ECC here
+                    if field.field_type == FlashFieldType.ECC:
+                        block_ecc_range = field_range
+                        cur_block_ecc = block_data[block_ecc_range.start : block_ecc_range.end]
+                        only_ecc.extend(cur_block_ecc)
+                        # Add hash of everything up to the ECC to our dict for faster packing
+                        block_data_hash = md5(block_data[: block_ecc_range.start]).digest()
+                        DATA_HASHES[block_data_hash] = cur_block_ecc
+
+                    if field.field_type == FlashFieldType.DATA:
+                        block_data_range = field_range
+                        # Get next ECC range
+                        future_offset = field_offset
+                        block_list = list(block)
+                        for future_field in block_list[field_index:]:
+                            if future_field.field_type == FlashFieldType.ECC:
+                                block_ecc_range = Range(
+                                    future_offset, future_offset + future_field.size
                                 )
-                        except EccError:
-                            raise UnpackerError("ECC correction failed")
-                    else:
-                        # No ECC found in block, just add the data directly
-                        only_data.append(block_data[block_data_range.start : block_data_range.end])
-                field_offset += field.size
-            offset += block_size
-
+                            future_offset += future_field.size
+
+                        if block_ecc_range is not None:
+                            # Try decoding/correcting with ECC, report any error
+                            try:
+                                # Assumes that data comes before ECC
+                                if (ecc_attr is not None) and (ecc_attr.ecc_class is not None):
+                                    only_data.extend(
+                                        ecc_attr.ecc_class.decode(
+                                            block_data[: block_ecc_range.end]
+                                        )[block_data_range.start : block_data_range.end]
+                                    )
+                                else:
+                                    raise UnpackerError(
+                                        "Tried to correct with ECC without providing an ecc_class in FlashEccAttributes"
+                                    )
+                            except EccError:
+                                raise UnpackerError("ECC correction failed")
+                        else:
+                            # No ECC found in block, just add the data directly
+                            only_data.extend(
+                                block_data[block_data_range.start : block_data_range.end]
+                            )
+                    field_offset += field.size
+                offset += block_size
+            if not only_data:
+                only_data = bytearray(data)
         # Add all block data to logical resource for recursive unpacking
         await oob_resource.create_child(
             tags=(FlashLogicalDataResource,),
-            data=b"".join(only_data) if only_data else data,
+            data=only_data,
             attributes=[
                 flash_attr,
             ],
         )
         if ecc_attr is not None:
             await oob_resource.create_child(
                 tags=(FlashLogicalEccResource,),
-                data=b"".join(only_ecc),
+                data=only_ecc,
                 attributes=[
                     ecc_attr,
                 ],