From 50ebf6c5dfbe60ccfd6e03d388c45b73076635e3 Mon Sep 17 00:00:00 2001 From: koki Date: Sun, 22 Dec 2024 16:54:12 +0800 Subject: [PATCH] introduct TAC --- .gitignore | 1 + README.md | 16 ++++++++++ examples/dis_demo.py | 12 +++---- ohre/abcre/dis/AsmMethod.py | 18 +++++------ ohre/abcre/dis/{NAC_LV.py => CODE_LV.py} | 4 +-- ohre/abcre/dis/CodeBlock.py | 40 ++++++++++++++++++++++++ ohre/abcre/dis/CodeBlocks.py | 39 +++++++++++++++++++++++ ohre/abcre/dis/ControlFlow.py | 14 ++++----- ohre/abcre/dis/ISA.py | 1 - ohre/abcre/dis/NAC.py | 7 ----- ohre/abcre/dis/NACBlock.py | 39 ----------------------- ohre/abcre/dis/NACBlocks.py | 39 ----------------------- ohre/abcre/dis/NACTYPE.py | 1 + ohre/abcre/dis/NativeToTAC.py | 0 ohre/abcre/dis/TAC.py | 10 ++++++ pyproject.toml | 1 + 16 files changed, 132 insertions(+), 110 deletions(-) rename ohre/abcre/dis/{NAC_LV.py => CODE_LV.py} (80%) create mode 100644 ohre/abcre/dis/CodeBlock.py create mode 100644 ohre/abcre/dis/CodeBlocks.py delete mode 100644 ohre/abcre/dis/NACBlock.py delete mode 100644 ohre/abcre/dis/NACBlocks.py create mode 100644 ohre/abcre/dis/NativeToTAC.py create mode 100644 ohre/abcre/dis/TAC.py diff --git a/.gitignore b/.gitignore index 836191b..a886f80 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ tmp_app_extract* local_readme.md resources.index poetry.lock +.VSCodeCounter/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index 78899e6..85c3a5c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,22 @@ python ohre_demo.py xxx.app # run demo with HarmonyOS app python ohre_demo.py xxx.hap --resource_analysis # run demo with HarmonyOS hap and resource analysis ``` +### ArkTS Reverse + +#### Non-Logical Code + +```bash +python examples\abc_decompile.py name.abc +``` + +#### Logical Code + +```bash +python examples\dis_demo.py xxx.abc.dis # put isa.yaml from arkcompiler_ets_runtime to ./ohre/abcre/dis/isa.yaml +``` + + + ## Contacts Please new an issue, participate in the discussion or make a PR. diff --git a/examples/dis_demo.py b/examples/dis_demo.py index aafebbb..dfcb5db 100644 --- a/examples/dis_demo.py +++ b/examples/dis_demo.py @@ -25,10 +25,10 @@ # print(f">> {asmstr}") # === reverse truly START - # print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}") - # dis_file.methods[0].split_native_code_block() - # print(f">> after ControlFlow build {dis_file.methods[0].debug_deep()}") + print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}") + dis_file.methods[0].split_native_code_block() + print(f">> after ControlFlow build {dis_file.methods[0].debug_deep()}") - for asm_method in dis_file.methods: - asm_method.split_native_code_block() - print(f">> CFed: {asm_method.debug_deep()}") + # for asm_method in dis_file.methods: + # asm_method.split_native_code_block() + # print(f">> CFed: {asm_method.debug_deep()}") diff --git a/ohre/abcre/dis/AsmMethod.py b/ohre/abcre/dis/AsmMethod.py index 10b06db..5e6e054 100644 --- a/ohre/abcre/dis/AsmMethod.py +++ b/ohre/abcre/dis/AsmMethod.py @@ -2,8 +2,8 @@ from ohre.abcre.dis.AsmTypes import AsmTypes from ohre.abcre.dis.ControlFlow import ControlFlow -from ohre.abcre.dis.NAC_LV import NAC_LV -from ohre.abcre.dis.NACBlocks import NACBlocks +from ohre.abcre.dis.CODE_LV import CODE_LV +from ohre.abcre.dis.CodeBlocks import CodeBlocks from ohre.misc import Log, utils @@ -17,14 +17,14 @@ def __init__(self, slotNumberIdx, lines: List[str]): self.class_func_name: str = "" self.func_type: str = "" self.args: List = list() - self.nac_blocks: NACBlocks | None = None + self.code_blocks: CodeBlocks | None = None insts = self._process_method(lines) - self.nac_blocks = NACBlocks(insts) + self.code_blocks = CodeBlocks(insts) def split_native_code_block(self): - assert self.nac_blocks.IR_lv == NAC_LV.NATIVE - self.nac_blocks = ControlFlow.split_native_code_block(self.nac_blocks) - self.nac_blocks.IR_lv = NAC_LV.NATIVE_BLOCK_SPLITED + assert self.code_blocks.IR_lv == CODE_LV.NATIVE + self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks) + self.code_blocks.IR_lv = CODE_LV.NATIVE_BLOCK_SPLITED def _process_1st_line(self, line: str): parts = line.split(" ") @@ -96,9 +96,9 @@ def __str__(self): def debug_short(self) -> str: out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} ret {self.return_type} \ file: {self.file_name}\n\ -args({len(self.args)}) {self.args} nac_blocks({len(self.nac_blocks)})" +args({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})" return out def debug_deep(self) -> str: - out = f"{self.debug_short()}\n{self.nac_blocks.debug_deep()}" + out = f"{self.debug_short()}\n{self.code_blocks.debug_deep()}" return out diff --git a/ohre/abcre/dis/NAC_LV.py b/ohre/abcre/dis/CODE_LV.py similarity index 80% rename from ohre/abcre/dis/NAC_LV.py rename to ohre/abcre/dis/CODE_LV.py index e8fe35e..2a559e1 100644 --- a/ohre/abcre/dis/NAC_LV.py +++ b/ohre/abcre/dis/CODE_LV.py @@ -1,10 +1,10 @@ from ohre.abcre.enum.BaseEnum import BaseEnum -class NAC_LV(BaseEnum): +class CODE_LV(BaseEnum): def __init__(self): super().__init__() NATIVE = 0 NATIVE_BLOCK_SPLITED = 1 - IR_LV1 = 2 + TAC = 2 IR_LV2 = 3 diff --git a/ohre/abcre/dis/CodeBlock.py b/ohre/abcre/dis/CodeBlock.py new file mode 100644 index 0000000..ce3db6a --- /dev/null +++ b/ohre/abcre/dis/CodeBlock.py @@ -0,0 +1,40 @@ +import copy +from typing import Any, Dict, Iterable, List, Tuple + +from ohre.abcre.dis.NAC import NAC +from ohre.abcre.dis.TAC import TAC +from ohre.abcre.dis.NACTYPE import NACTYPE + + +class CodeBlock(): # asm instruction(NAC) cantained + def __init__(self, in_l: List[List[str]] | List[NAC] | List[NAC]): + assert len(in_l) >= 0 + self.insts: List[NAC] | List[TAC] = list() + if (isinstance(in_l[0], NAC)): # NAC in list + self.insts = copy.deepcopy(in_l) + else: # maybe list in list # anyway, try init NAC using element in list + for inst in in_l: + assert len(inst) > 0 + self.insts.append(NAC(inst)) + + def get_slice_block(self, idx_start: int, idx_end: int): + return CodeBlock(copy.deepcopy(self.insts[idx_start: idx_end])) + + def __str__(self): + return self.debug_short() + + def __len__(self): + return len(self.insts) + + def debug_short(self): + out = f"CodeBlock: insts {len(self.insts)}" + return out + + def debug_deep(self): + out = f"CodeBlock: insts {len(self.insts)}\n" + for i in range(len(self.insts)): + if (self.insts[i].type == NACTYPE.LABEL): + out += f"{i} {self.insts[i].debug_deep()}\n" + else: + out += f"{i}\t{self.insts[i].debug_deep()}\n" + return out.strip() diff --git a/ohre/abcre/dis/CodeBlocks.py b/ohre/abcre/dis/CodeBlocks.py new file mode 100644 index 0000000..8cb000f --- /dev/null +++ b/ohre/abcre/dis/CodeBlocks.py @@ -0,0 +1,39 @@ +import copy +from typing import Any, Dict, Iterable, List, Tuple + +from ohre.abcre.dis.NAC import NAC +from ohre.abcre.dis.CODE_LV import CODE_LV +from ohre.abcre.dis.CodeBlock import CodeBlock +from ohre.abcre.dis.NACTYPE import NACTYPE + + +class CodeBlocks(): # NAC block contained, build control flow graph inside a single CodeBlocks for one method + def __init__(self, in_l: List[List[str]] | List[CodeBlock]): + assert len(in_l) >= 0 + self.blocks: List[CodeBlock] = list() + self.IR_lv = CODE_LV.NATIVE # native + + if (isinstance(in_l[0], CodeBlock)): # CodeBlock in list + self.blocks = copy.deepcopy(in_l) + else: # maybe list(str) in list # anyway, try init CodeBlock using element(asm codea str list) in list + self.blocks: List[CodeBlock] = [CodeBlock(in_l)] + + def __str__(self): + return self.debug_short() + + @property + def len(self): + return len(self.blocks) + + def __len__(self): + return len(self.blocks) + + def debug_short(self): + out = f"CodeBlocks: blocks({len(self.blocks)}) {CODE_LV.get_code_name(self.IR_lv)}" + return out + + def debug_deep(self): + out = f"{self.debug_short()}\n" + for i in range(len(self.blocks)): + out += f"[{i}/{len(self.blocks)}]-block: {self.blocks[i].debug_deep()}\n" + return out diff --git a/ohre/abcre/dis/ControlFlow.py b/ohre/abcre/dis/ControlFlow.py index cda0e47..0d4336d 100644 --- a/ohre/abcre/dis/ControlFlow.py +++ b/ohre/abcre/dis/ControlFlow.py @@ -1,16 +1,16 @@ -from ohre.abcre.dis.NACBlock import NACBlock -from ohre.abcre.dis.NACBlocks import NACBlocks +from ohre.abcre.dis.CodeBlock import CodeBlock +from ohre.abcre.dis.CodeBlocks import CodeBlocks from ohre.abcre.dis.NACTYPE import NACTYPE from ohre.misc import Log, utils class ControlFlow(): - def split_native_code_block(blocks: NACBlocks) -> NACBlocks: + def split_native_code_block(blocks: CodeBlocks) -> CodeBlocks: assert len(blocks) == 1 - nac_block = blocks.nac_blocks[0] + nac_block = blocks.blocks[0] # should only have one NAC block, not TAC delimited_id: list = list() for i in range(len(nac_block)): - nac = nac_block.nacs[i] + nac = nac_block.insts[i] if (nac.type == NACTYPE.LABEL): delimited_id.append(i) elif (nac.type == NACTYPE.COND_JMP or nac.type == NACTYPE.UNCN_JMP or nac.type == NACTYPE.RETURN): @@ -22,7 +22,7 @@ def split_native_code_block(blocks: NACBlocks) -> NACBlocks: debug_out = "" for idx in delimited_id: if (idx < len(nac_block)): - debug_out += f"{idx}-{nac_block.nacs[idx]}; " + debug_out += f"{idx}-{nac_block.insts[idx]}; " else: debug_out += f"{idx} nac_block len {len(nac_block)}" Log.info(f"[ControlFlow] delimited id-nac {debug_out}", False) @@ -33,4 +33,4 @@ def split_native_code_block(blocks: NACBlocks) -> NACBlocks: idx_end = delimited_id[i] final_nac_blocks.append(nac_block.get_slice_block(idx_start, idx_end)) idx_start = idx_end - return NACBlocks(final_nac_blocks) + return CodeBlocks(final_nac_blocks) diff --git a/ohre/abcre/dis/ISA.py b/ohre/abcre/dis/ISA.py index 08bd72f..d4f0510 100644 --- a/ohre/abcre/dis/ISA.py +++ b/ohre/abcre/dis/ISA.py @@ -107,7 +107,6 @@ def get_opstr_info_dict(self, opstr: str) -> Dict | None: if __name__ == "__main__": ohre.set_log_print(True) - d = utils.read_dict_from_yaml_file(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml")) isa = ISA(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml")) # print(json.dumps(isa.ori_d["groups"], indent=4)) assert isa.get_opcodes("deprecated.getiteratornext") == [0xfc02] diff --git a/ohre/abcre/dis/NAC.py b/ohre/abcre/dis/NAC.py index 01225f2..f956848 100644 --- a/ohre/abcre/dis/NAC.py +++ b/ohre/abcre/dis/NAC.py @@ -20,13 +20,6 @@ def __init__(self, op_args: List[str]): def __str__(self): return self.debug_short() - def _is_std_nac(self): - std_nac_set = {NACTYPE.ASSIGN, NACTYPE.COND_JMP, NACTYPE.UNCN_JMP, - NACTYPE.CALL, NACTYPE.COND_THROW, NACTYPE.UNCN_THROW, NACTYPE.RETURN} - if (self.type in std_nac_set): - return True - return False - def debug_short(self): out = f"{self.op} " for i in range(len(self.args)): diff --git a/ohre/abcre/dis/NACBlock.py b/ohre/abcre/dis/NACBlock.py deleted file mode 100644 index a8ece0a..0000000 --- a/ohre/abcre/dis/NACBlock.py +++ /dev/null @@ -1,39 +0,0 @@ -import copy -from typing import Any, Dict, Iterable, List, Tuple - -from ohre.abcre.dis.NAC import NAC -from ohre.abcre.dis.NACTYPE import NACTYPE - - -class NACBlock(): # asm instruction(NAC) cantained - def __init__(self, in_l: List[List[str]] | List[NAC]): - assert len(in_l) >= 0 - self.nacs: List[NAC] = list() - if (isinstance(in_l[0], NAC)): # NAC in list - self.nacs = copy.deepcopy(in_l) - else: # maybe list in list # anyway, try init NAC using element in list - for inst in in_l: - assert len(inst) > 0 - self.nacs.append(NAC(inst)) - - def get_slice_block(self, idx_start: int, idx_end: int): - return NACBlock(copy.deepcopy(self.nacs[idx_start: idx_end])) - - def __str__(self): - return self.debug_short() - - def __len__(self): - return len(self.nacs) - - def debug_short(self): - out = f"NACBlock: nacs {len(self.nacs)}" - return out - - def debug_deep(self): - out = f"NACBlock: nacs {len(self.nacs)}\n" - for i in range(len(self.nacs)): - if (self.nacs[i].type == NACTYPE.LABEL): - out += f"{i} {self.nacs[i].debug_deep()}\n" - else: - out += f"{i}\t{self.nacs[i].debug_deep()}\n" - return out.strip() diff --git a/ohre/abcre/dis/NACBlocks.py b/ohre/abcre/dis/NACBlocks.py deleted file mode 100644 index 72b1f8c..0000000 --- a/ohre/abcre/dis/NACBlocks.py +++ /dev/null @@ -1,39 +0,0 @@ -import copy -from typing import Any, Dict, Iterable, List, Tuple - -from ohre.abcre.dis.NAC import NAC -from ohre.abcre.dis.NAC_LV import NAC_LV -from ohre.abcre.dis.NACBlock import NACBlock -from ohre.abcre.dis.NACTYPE import NACTYPE - - -class NACBlocks(): # NAC block contained, build control flow graph inside a single NACBlocks for one method - def __init__(self, in_l: List[List[str]] | List[NACBlock]): - assert len(in_l) >= 0 - self.nac_blocks: List[NACBlock] = list() - self.IR_lv = NAC_LV.NATIVE # native - - if (isinstance(in_l[0], NACBlock)): # NACBlock in list - self.nac_blocks = copy.deepcopy(in_l) - else: # maybe list(str) in list # anyway, try init NACBlock using element(asm codea str list) in list - self.nac_blocks: List[NACBlock] = [NACBlock(in_l)] - - def __str__(self): - return self.debug_short() - - @property - def len(self): - return len(self.nac_blocks) - - def __len__(self): - return len(self.nac_blocks) - - def debug_short(self): - out = f"NACBlocks: nac block({len(self.nac_blocks)}) {NAC_LV.get_code_name(self.IR_lv)}" - return out - - def debug_deep(self): - out = f"{self.debug_short()}\n" - for i in range(len(self.nac_blocks)): - out += f"[{i}/{len(self.nac_blocks)}]-block: {self.nac_blocks[i].debug_deep()}\n" - return out diff --git a/ohre/abcre/dis/NACTYPE.py b/ohre/abcre/dis/NACTYPE.py index edea9fa..7ecce4b 100644 --- a/ohre/abcre/dis/NACTYPE.py +++ b/ohre/abcre/dis/NACTYPE.py @@ -106,6 +106,7 @@ def init_from_ISAyaml(cls, yaml_path: str): # "mov", "return", "ldobjbyname", "jeqz", "jnez", "jstricteq", "jnstricteq", "throw", "throw.notexists", # "throw.ifnotobject"]: # print(f"inst {inst}: {NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst))}") + print(f"op total count: {len(NACTYPE.isa.opstr2infod)}") for inst in NACTYPE.isa.opstr2infod.keys(): print(f"inst {inst}: {NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst))}") assert NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst)) != "UNKNOWN" diff --git a/ohre/abcre/dis/NativeToTAC.py b/ohre/abcre/dis/NativeToTAC.py new file mode 100644 index 0000000..e69de29 diff --git a/ohre/abcre/dis/TAC.py b/ohre/abcre/dis/TAC.py new file mode 100644 index 0000000..a93510a --- /dev/null +++ b/ohre/abcre/dis/TAC.py @@ -0,0 +1,10 @@ +from typing import Any, Dict, Iterable, List, Tuple + +from ohre.abcre.dis.NACTYPE import NACTYPE + + +class TAC(): # Three Address Code + + def __init__(self, optype, op_args: List): + self.optype = optype + self.args = None diff --git a/pyproject.toml b/pyproject.toml index 16a69fa..fa9377a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ python = "^3.9" "yara-python" = "^4.5.0" pendulum = "^3.0.0" leb128 = "^1.0.6" +pyyaml = "^5.4.0" [build-system] requires = ["poetry-core>=1.0.0"]