Skip to content

Commit

Permalink
introduct TAC
Browse files Browse the repository at this point in the history
  • Loading branch information
kokifish committed Dec 22, 2024
1 parent 08242d1 commit 50ebf6c
Show file tree
Hide file tree
Showing 16 changed files with 132 additions and 110 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ tmp_app_extract*
local_readme.md
resources.index
poetry.lock
.VSCodeCounter/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ python ohre_demo.py xxx.app # run demo with HarmonyOS app
python ohre_demo.py xxx.hap --resource_analysis # run demo with HarmonyOS hap and resource analysis
```

### ArkTS Reverse

#### Non-Logical Code

```bash
python examples\abc_decompile.py name.abc
```

#### Logical Code

```bash
python examples\dis_demo.py xxx.abc.dis # put isa.yaml from arkcompiler_ets_runtime to ./ohre/abcre/dis/isa.yaml
```



## Contacts

Please new an issue, participate in the discussion or make a PR.
12 changes: 6 additions & 6 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
# print(f">> {asmstr}")

# === reverse truly START
# print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}")
# dis_file.methods[0].split_native_code_block()
# print(f">> after ControlFlow build {dis_file.methods[0].debug_deep()}")
print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}")
dis_file.methods[0].split_native_code_block()
print(f">> after ControlFlow build {dis_file.methods[0].debug_deep()}")

for asm_method in dis_file.methods:
asm_method.split_native_code_block()
print(f">> CFed: {asm_method.debug_deep()}")
# for asm_method in dis_file.methods:
# asm_method.split_native_code_block()
# print(f">> CFed: {asm_method.debug_deep()}")
18 changes: 9 additions & 9 deletions ohre/abcre/dis/AsmMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.ControlFlow import ControlFlow
from ohre.abcre.dis.NAC_LV import NAC_LV
from ohre.abcre.dis.NACBlocks import NACBlocks
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlocks import CodeBlocks
from ohre.misc import Log, utils


Expand All @@ -17,14 +17,14 @@ def __init__(self, slotNumberIdx, lines: List[str]):
self.class_func_name: str = ""
self.func_type: str = ""
self.args: List = list()
self.nac_blocks: NACBlocks | None = None
self.code_blocks: CodeBlocks | None = None
insts = self._process_method(lines)
self.nac_blocks = NACBlocks(insts)
self.code_blocks = CodeBlocks(insts)

def split_native_code_block(self):
assert self.nac_blocks.IR_lv == NAC_LV.NATIVE
self.nac_blocks = ControlFlow.split_native_code_block(self.nac_blocks)
self.nac_blocks.IR_lv = NAC_LV.NATIVE_BLOCK_SPLITED
assert self.code_blocks.IR_lv == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.IR_lv = CODE_LV.NATIVE_BLOCK_SPLITED

def _process_1st_line(self, line: str):
parts = line.split(" ")
Expand Down Expand Up @@ -96,9 +96,9 @@ def __str__(self):
def debug_short(self) -> str:
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} ret {self.return_type} \
file: {self.file_name}\n\
args({len(self.args)}) {self.args} nac_blocks({len(self.nac_blocks)})"
args({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})"
return out

def debug_deep(self) -> str:
out = f"{self.debug_short()}\n{self.nac_blocks.debug_deep()}"
out = f"{self.debug_short()}\n{self.code_blocks.debug_deep()}"
return out
4 changes: 2 additions & 2 deletions ohre/abcre/dis/NAC_LV.py → ohre/abcre/dis/CODE_LV.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from ohre.abcre.enum.BaseEnum import BaseEnum


class NAC_LV(BaseEnum):
class CODE_LV(BaseEnum):
def __init__(self):
super().__init__()
NATIVE = 0
NATIVE_BLOCK_SPLITED = 1
IR_LV1 = 2
TAC = 2
IR_LV2 = 3
40 changes: 40 additions & 0 deletions ohre/abcre/dis/CodeBlock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import copy
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.TAC import TAC
from ohre.abcre.dis.NACTYPE import NACTYPE


class CodeBlock(): # asm instruction(NAC) cantained
def __init__(self, in_l: List[List[str]] | List[NAC] | List[NAC]):
assert len(in_l) >= 0
self.insts: List[NAC] | List[TAC] = list()
if (isinstance(in_l[0], NAC)): # NAC in list
self.insts = copy.deepcopy(in_l)
else: # maybe list in list # anyway, try init NAC using element in list
for inst in in_l:
assert len(inst) > 0
self.insts.append(NAC(inst))

def get_slice_block(self, idx_start: int, idx_end: int):
return CodeBlock(copy.deepcopy(self.insts[idx_start: idx_end]))

def __str__(self):
return self.debug_short()

def __len__(self):
return len(self.insts)

def debug_short(self):
out = f"CodeBlock: insts {len(self.insts)}"
return out

def debug_deep(self):
out = f"CodeBlock: insts {len(self.insts)}\n"
for i in range(len(self.insts)):
if (self.insts[i].type == NACTYPE.LABEL):
out += f"{i} {self.insts[i].debug_deep()}\n"
else:
out += f"{i}\t{self.insts[i].debug_deep()}\n"
return out.strip()
39 changes: 39 additions & 0 deletions ohre/abcre/dis/CodeBlocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import copy
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlock import CodeBlock
from ohre.abcre.dis.NACTYPE import NACTYPE


class CodeBlocks(): # NAC block contained, build control flow graph inside a single CodeBlocks for one method
def __init__(self, in_l: List[List[str]] | List[CodeBlock]):
assert len(in_l) >= 0
self.blocks: List[CodeBlock] = list()
self.IR_lv = CODE_LV.NATIVE # native

if (isinstance(in_l[0], CodeBlock)): # CodeBlock in list
self.blocks = copy.deepcopy(in_l)
else: # maybe list(str) in list # anyway, try init CodeBlock using element(asm codea str list) in list
self.blocks: List[CodeBlock] = [CodeBlock(in_l)]

def __str__(self):
return self.debug_short()

@property
def len(self):
return len(self.blocks)

def __len__(self):
return len(self.blocks)

def debug_short(self):
out = f"CodeBlocks: blocks({len(self.blocks)}) {CODE_LV.get_code_name(self.IR_lv)}"
return out

def debug_deep(self):
out = f"{self.debug_short()}\n"
for i in range(len(self.blocks)):
out += f"[{i}/{len(self.blocks)}]-block: {self.blocks[i].debug_deep()}\n"
return out
14 changes: 7 additions & 7 deletions ohre/abcre/dis/ControlFlow.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from ohre.abcre.dis.NACBlock import NACBlock
from ohre.abcre.dis.NACBlocks import NACBlocks
from ohre.abcre.dis.CodeBlock import CodeBlock
from ohre.abcre.dis.CodeBlocks import CodeBlocks
from ohre.abcre.dis.NACTYPE import NACTYPE
from ohre.misc import Log, utils


class ControlFlow():
def split_native_code_block(blocks: NACBlocks) -> NACBlocks:
def split_native_code_block(blocks: CodeBlocks) -> CodeBlocks:
assert len(blocks) == 1
nac_block = blocks.nac_blocks[0]
nac_block = blocks.blocks[0] # should only have one NAC block, not TAC
delimited_id: list = list()
for i in range(len(nac_block)):
nac = nac_block.nacs[i]
nac = nac_block.insts[i]
if (nac.type == NACTYPE.LABEL):
delimited_id.append(i)
elif (nac.type == NACTYPE.COND_JMP or nac.type == NACTYPE.UNCN_JMP or nac.type == NACTYPE.RETURN):
Expand All @@ -22,7 +22,7 @@ def split_native_code_block(blocks: NACBlocks) -> NACBlocks:
debug_out = ""
for idx in delimited_id:
if (idx < len(nac_block)):
debug_out += f"{idx}-{nac_block.nacs[idx]}; "
debug_out += f"{idx}-{nac_block.insts[idx]}; "
else:
debug_out += f"{idx} nac_block len {len(nac_block)}"
Log.info(f"[ControlFlow] delimited id-nac {debug_out}", False)
Expand All @@ -33,4 +33,4 @@ def split_native_code_block(blocks: NACBlocks) -> NACBlocks:
idx_end = delimited_id[i]
final_nac_blocks.append(nac_block.get_slice_block(idx_start, idx_end))
idx_start = idx_end
return NACBlocks(final_nac_blocks)
return CodeBlocks(final_nac_blocks)
1 change: 0 additions & 1 deletion ohre/abcre/dis/ISA.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def get_opstr_info_dict(self, opstr: str) -> Dict | None:

if __name__ == "__main__":
ohre.set_log_print(True)
d = utils.read_dict_from_yaml_file(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml"))
isa = ISA(os.path.join(os.path.dirname(os.path.abspath(__file__)), "isa.yaml"))
# print(json.dumps(isa.ori_d["groups"], indent=4))
assert isa.get_opcodes("deprecated.getiteratornext") == [0xfc02]
Expand Down
7 changes: 0 additions & 7 deletions ohre/abcre/dis/NAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ def __init__(self, op_args: List[str]):
def __str__(self):
return self.debug_short()

def _is_std_nac(self):
std_nac_set = {NACTYPE.ASSIGN, NACTYPE.COND_JMP, NACTYPE.UNCN_JMP,
NACTYPE.CALL, NACTYPE.COND_THROW, NACTYPE.UNCN_THROW, NACTYPE.RETURN}
if (self.type in std_nac_set):
return True
return False

def debug_short(self):
out = f"{self.op} "
for i in range(len(self.args)):
Expand Down
39 changes: 0 additions & 39 deletions ohre/abcre/dis/NACBlock.py

This file was deleted.

39 changes: 0 additions & 39 deletions ohre/abcre/dis/NACBlocks.py

This file was deleted.

1 change: 1 addition & 0 deletions ohre/abcre/dis/NACTYPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def init_from_ISAyaml(cls, yaml_path: str):
# "mov", "return", "ldobjbyname", "jeqz", "jnez", "jstricteq", "jnstricteq", "throw", "throw.notexists",
# "throw.ifnotobject"]:
# print(f"inst {inst}: {NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst))}")
print(f"op total count: {len(NACTYPE.isa.opstr2infod)}")
for inst in NACTYPE.isa.opstr2infod.keys():
print(f"inst {inst}: {NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst))}")
assert NACTYPE.get_code_name(NACTYPE.get_NAC_type(inst)) != "UNKNOWN"
Empty file added ohre/abcre/dis/NativeToTAC.py
Empty file.
10 changes: 10 additions & 0 deletions ohre/abcre/dis/TAC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NACTYPE import NACTYPE


class TAC(): # Three Address Code

def __init__(self, optype, op_args: List):
self.optype = optype
self.args = None
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ python = "^3.9"
"yara-python" = "^4.5.0"
pendulum = "^3.0.0"
leb128 = "^1.0.6"
pyyaml = "^5.4.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down

0 comments on commit 50ebf6c

Please sign in to comment.