diff --git a/disassemblers/ofrak_capstone/ofrak_capstone/components.py b/disassemblers/ofrak_capstone/ofrak_capstone/components.py index e14bf097a..82a794f54 100644 --- a/disassemblers/ofrak_capstone/ofrak_capstone/components.py +++ b/disassemblers/ofrak_capstone/ofrak_capstone/components.py @@ -1,8 +1,9 @@ import asyncio import logging -from ofrak.component.unpacker import UnpackerError -from ofrak_type.architecture import InstructionSetMode +from ofrak.component.unpacker import Unpacker, UnpackerError +from ofrak.core.code_region import CodeRegion +from ofrak_type.architecture import InstructionSet, InstructionSetMode from ofrak.core.addressable import Addressable from ofrak.core.architecture import ProgramAttributes from ofrak.core.basic_block import BasicBlock, BasicBlockUnpacker @@ -24,6 +25,88 @@ LOGGER = logging.getLogger(__name__) +class CapstoneCodeRegionUnpacker(Unpacker[None]): + """For use when no other disassembler is available. + Disassembles all data as instructions in a CodeRegion using capstone. + Does not analyze code flow. Will not be able to differentiate between ARM and Thumb. + """ + + id = b"CodeRegionUnpacker" + targets = (CodeRegion,) + children = (Instruction,) + + def __init__( + self, + resource_factory: ResourceFactory, + data_service: DataServiceInterface, + resource_service: ResourceServiceInterface, + component_locator: ComponentLocatorInterface, + disassembler_service: DisassemblerServiceInterface, + ): + super().__init__(resource_factory, data_service, resource_service, component_locator) + self._disassembler_service = disassembler_service + + async def unpack(self, resource: Resource, config=None): + code_region_view = await resource.view_as(CodeRegion) + code_region_data = await resource.get_data() + if resource.has_attributes(ProgramAttributes): + program_attrs = resource.get_attributes(ProgramAttributes) + else: + program_attrs = await resource.analyze(ProgramAttributes) + + if program_attrs.isa in (InstructionSet.AARCH64, InstructionSet.ARM): + logging.critical( + """Capstone CodeRegion Unpacker being used on ARM Binary! + Will not be able to parse Thumb Switches! + Use of another backend is highly suggested. + """ + ) + if program_attrs.isa is InstructionSet.PPC: + logging.warning( + """Capstone CodeRegion Unpacker being used on PPC binary. + If you beleive your binary has VLE instructions, it is suggested + that you use an aditional backend. + """ + ) + + disassemble_request = DisassemblerServiceRequest( + program_attrs.isa, + program_attrs.sub_isa, + program_attrs.bit_width, + program_attrs.endianness, + program_attrs.processor, + None, + code_region_data, + code_region_view.virtual_address, + ) + instruction_children_created = [] + + for disassem_result in await self._disassembler_service.disassemble(disassemble_request): + instruction_view = Instruction( + disassem_result.address, + disassem_result.size, + f"{disassem_result.mnemonic} {disassem_result.operands}", + disassem_result.mnemonic, + disassem_result.operands, + None, + ) + instruction_view = Instruction( + disassem_result.address, + disassem_result.size, + f"{disassem_result.mnemonic} {disassem_result.operands}", + disassem_result.mnemonic, + disassem_result.operands, + None, + ) + + instruction_children_created.append( + code_region_view.create_child_region( + instruction_view, additional_attributes=(program_attrs,) + ) + ) + await asyncio.gather(*instruction_children_created) + + class CapstoneBasicBlockUnpacker(BasicBlockUnpacker): def __init__( self,