Skip to content

Commit

Permalink
tac builder start
Browse files Browse the repository at this point in the history
  • Loading branch information
kokifish committed Dec 22, 2024
1 parent 50ebf6c commit 18bb3b4
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 28 deletions.
4 changes: 2 additions & 2 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
# print(f">> {asmstr}")

# === reverse truly START
print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}")
# print(f">> before ControlFlow build {dis_file.methods[0].debug_deep()}")
dis_file.methods[0].split_native_code_block()
print(f">> after ControlFlow build {dis_file.methods[0].debug_deep()}")

dis_file.methods[0].native_code_to_TAC()
# for asm_method in dis_file.methods:
# asm_method.split_native_code_block()
# print(f">> CFed: {asm_method.debug_deep()}")
39 changes: 39 additions & 0 deletions ohre/abcre/dis/AsmArg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.misc import Log, utils


class AsmArg:
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name="", value=None, obj_ref=None):
self.type = arg_type
# name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v)
self.name = name
# value: may be set in the subsequent analysis
self.value = value
self.obj_ref = obj_ref

def __str__(self):
return self.debug_short()

@classmethod
def build_arg(cls, s: str):
assert isinstance(s, str) and len(s) > 0
if (s.startswith("v")):
return AsmArg(AsmTypes.VAR, s)
if (s.startswith("a")):
return AsmArg(AsmTypes.ARG, s)
Log.error(f"build_arg failed: s={s}")

def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it
pass

def debug_short(self):
out = f"{AsmTypes.get_code_name(self.type)}-{self.name}"
if (self.value is not None):
out += f"({self.value})"
if (self.obj_ref is not None):
out += f"//{self.obj_ref}"
return out

def debug_deep(self):
out = f"{self.debug_short()}"
return out
18 changes: 12 additions & 6 deletions ohre/abcre/dis/AsmMethod.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.abcre.dis.ControlFlow import ControlFlow
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlocks import CodeBlocks
from ohre.abcre.dis.NativeToTAC import NativeToTAC
from ohre.abcre.dis.ControlFlow import ControlFlow
from ohre.misc import Log, utils


Expand All @@ -22,9 +23,14 @@ def __init__(self, slotNumberIdx, lines: List[str]):
self.code_blocks = CodeBlocks(insts)

def split_native_code_block(self):
assert self.code_blocks.IR_lv == CODE_LV.NATIVE
assert self.code_blocks.level == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.IR_lv = CODE_LV.NATIVE_BLOCK_SPLITED
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)

def native_code_to_TAC(self):
assert self.code_blocks.level == CODE_LV.NATIVE_BLOCK_SPLITED
self.code_blocks = NativeToTAC.native_code_to_TAC(self.code_blocks)
self.code_blocks.set_level(CODE_LV.TAC)

def _process_1st_line(self, line: str):
parts = line.split(" ")
Expand Down Expand Up @@ -65,7 +71,7 @@ def _process_method(self, lines: List[str]) -> List[List[str]]:
tu = [line]
insts.append(tu)
else:
Log.error(f"ERROR: {line} NOT tag?", True)
Log.error(f"ERROR: {line} NOT tag?")
elif (len(line) == 0): # skip empty line
continue
elif (line == "}"): # process END
Expand Down Expand Up @@ -94,8 +100,8 @@ def __str__(self):
return self.debug_short()

def debug_short(self) -> str:
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} ret {self.return_type} \
file: {self.file_name}\n\
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} \
ret {self.return_type} file: {self.file_name}\n\
args({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})"
return out

Expand Down
11 changes: 11 additions & 0 deletions ohre/abcre/dis/AsmTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@

class AsmTypes(BaseEnum):
uint_types = {"u8", "u16", "u32", "u64"}
ACC = "acc"
VAR = "v" # e.g. v0, v1, v2
ARG = "a" # e.g. a0, a1, a2
REG = "reg" # register
IMM = "imm" # AsmArg: value is the actual value of immediate number
NULL = "null" # AsmArg: value not valid
TRUE = "true" # AsmArg: value not valid
FALSE = "false" # AsmArg: value not valid
ZERO = "zero" # AsmArg: value not valid
UNDEFINED = "undefined"
UNKNOWN = "unknown" # default value in this proj

def __init__(self):
super().__init__()
Expand Down
10 changes: 5 additions & 5 deletions ohre/abcre/dis/CodeBlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.TAC import TAC
from ohre.abcre.dis.NACTYPE import NACTYPE
from ohre.abcre.dis.TAC import TAC


class CodeBlock(): # asm instruction(NAC) cantained
def __init__(self, in_l: List[List[str]] | List[NAC] | List[NAC]):
def __init__(self, in_l: List[List[str]] | List[NAC] | List[TAC]):
assert len(in_l) >= 0
self.insts: List[NAC] | List[TAC] = list()
if (isinstance(in_l[0], NAC)): # NAC in list
Expand All @@ -23,14 +23,14 @@ def get_slice_block(self, idx_start: int, idx_end: int):
def __str__(self):
return self.debug_short()

def __len__(self):
def __len__(self) -> int:
return len(self.insts)

def debug_short(self):
def debug_short(self) -> str:
out = f"CodeBlock: insts {len(self.insts)}"
return out

def debug_deep(self):
def debug_deep(self) -> str:
out = f"CodeBlock: insts {len(self.insts)}\n"
for i in range(len(self.insts)):
if (self.insts[i].type == NACTYPE.LABEL):
Expand Down
31 changes: 24 additions & 7 deletions ohre/abcre/dis/CodeBlocks.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import copy
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlock import CodeBlock
from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.NACTYPE import NACTYPE
from ohre.misc import Log, utils


class CodeBlocks(): # NAC block contained, build control flow graph inside a single CodeBlocks for one method
def __init__(self, in_l: List[List[str]] | List[CodeBlock]):
def __init__(self, in_l: List[List[str]] | List[CodeBlock], ir_lv=CODE_LV.NATIVE):
assert len(in_l) >= 0
self.blocks: List[CodeBlock] = list()
self.IR_lv = CODE_LV.NATIVE # native
self.IR_level = ir_lv # defaul: from native

if (isinstance(in_l[0], CodeBlock)): # CodeBlock in list
self.blocks = copy.deepcopy(in_l)
Expand All @@ -25,14 +26,30 @@ def __str__(self):
def len(self):
return len(self.blocks)

def __len__(self):
@property
def level(self):
return self.IR_level

@property
def level_str(self) -> str:
return CODE_LV.get_code_name(self.IR_level)

def set_level(self, level):
if (level >= self.IR_level):
self.IR_level = level
return True
else:
Log.warn(f"[CodeBlocks] cannot lowering level, level {level} ori {self.IR_level}")
return False

def __len__(self) -> int:
return len(self.blocks)

def debug_short(self):
out = f"CodeBlocks: blocks({len(self.blocks)}) {CODE_LV.get_code_name(self.IR_lv)}"
def debug_short(self) -> str:
out = f"CodeBlocks: blocks({len(self.blocks)}) {self.level_str}"
return out

def debug_deep(self):
def debug_deep(self) -> str:
out = f"{self.debug_short()}\n"
for i in range(len(self.blocks)):
out += f"[{i}/{len(self.blocks)}]-block: {self.blocks[i].debug_deep()}\n"
Expand Down
4 changes: 2 additions & 2 deletions ohre/abcre/dis/DisFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ def __init__(self, value):
self.lines.append(line)
file.close()
else:
Log.error(f"DisFile init ERROR: value type NOT supported, {type(value)} {value}", True)
Log.error(f"DisFile init ERROR: value type NOT supported, {type(value)} {value}")
self._dis_process_main()

def _dis_process_main(self):
l_n = 0 # line number
state = STATE.INIT
while (l_n < len(self.lines)):
Log.info(f"DisFile processing: state {state} line-{l_n}: {self.lines[l_n].rstrip()}", True)
Log.info(f"DisFile processing: state {state} line-{l_n}: {self.lines[l_n].rstrip()}")
if (state == STATE.INIT):
state, l_n = self._read_disheader(l_n)
elif (state == STATE.NEW_SEC):
Expand Down
39 changes: 39 additions & 0 deletions ohre/abcre/dis/NativeToTAC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from ohre.abcre.dis.NAC import NAC
from ohre.abcre.dis.TAC import TAC
from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.CODE_LV import CODE_LV
from ohre.abcre.dis.CodeBlocks import CodeBlocks
from ohre.abcre.dis.AsmTypes import AsmTypes
from ohre.misc import Log, utils


class NativeToTAC:
@classmethod
def toTAC(cls, nac: NAC) -> TAC:
print(f"toTAC: nac: {nac.debug_deep()}") # TODO: more tac builder plz
if (nac.op == "mov"): # mov v1:out:any, v2:in:any # mov v0, a0
return TAC.tac_assign(AsmArg.build_arg(nac.args[0]), AsmArg.build_arg(nac.args[1]))
if (nac.op == "lda"): # lda v:in:any # lda v8
return TAC.tac_assign(AsmArg(AsmTypes.ACC), AsmArg.build_arg(nac.args[0]))
if (nac.op == "sta"): # sta v:out:any # sta v6
return TAC.tac_assign(AsmArg.build_arg(nac.args[0]), AsmArg(AsmTypes.ACC))
if (nac.op == "ldobjbyname"): # ldobjbyname imm:u16, string_id # ldobjbyname 0x0, "code"
return TAC.tac_assign(
AsmArg(AsmTypes.ACC),
AsmArg(AsmTypes.ACC, "", nac.args[1]),
log=f"arg0: {nac.args[0]} ")
if (nac.op == "isfalse"): # acc = ecma_op(acc, operand_0, ..., operands_n)
return TAC.tac_assign(AsmArg(AsmTypes.ACC), AsmArg(AsmTypes.ACC), AsmArg(AsmTypes.FALSE), rop="==")
if (nac.op == "jnez"): # jnez imm:i32 # a label str in *.dis file
return TAC.tac_assign()
else:
Log.error(f"toTAC failed, not support nac inst: {nac.debug_deep()}")
return None

@classmethod
def native_code_to_TAC(cls, blocks: CodeBlocks) -> CodeBlocks:
assert blocks.level == CODE_LV.NATIVE_BLOCK_SPLITED
for block in blocks.blocks:
for nac_inst in block.insts:
tac_inst = NativeToTAC.toTAC(nac_inst)
print(f"toTAC: tac: {tac_inst.debug_deep()}")
37 changes: 33 additions & 4 deletions ohre/abcre/dis/TAC.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,39 @@
from typing import Any, Dict, Iterable, List, Tuple

from ohre.abcre.dis.NACTYPE import NACTYPE
from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.TACTYPE import TACTYPE


class TAC(): # Three Address Code

def __init__(self, optype, op_args: List):
def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log=""):
self.optype = optype
self.args = None
self.args = args
self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+"
self.log = log

@classmethod
def tac_assign(cls, dst: AsmArg, src0: AsmArg, src1: AsmArg = None, rop="", log: str = ""):
if (src1 is None):
return TAC(TACTYPE.ASSIGN, [dst, src0])
assert src1 is not None and rop is not None and len(rop) > 0
return TAC(TACTYPE.ASSIGN_BI, [dst, src0, src1], rop=rop)

def __str__(self):
return self.debug_short()

def debug_short(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"

for i in range(len(self.args)):
out += f"{self.args[i].debug_short()}, "
return out

def debug_deep(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"
for i in range(len(self.args)):
out += f"{self.args[i].debug_deep()} "
if (i == 1 and self.rop is not None and len(self.rop) > 0):
out += f"({self.rop}) "
if (self.log is not None and len(self.log) > 0):
out += f" // {self.log}"
return out
13 changes: 13 additions & 0 deletions ohre/abcre/dis/TACTYPE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

from ohre.abcre.enum.BaseEnum import BaseEnum


class TACTYPE(BaseEnum):
def __init__(self):
super().__init__()
ASSIGN = 0
ASSIGN_BI = 1
COND_JMP = 10 # 3 arg
UNCN_JMP = 11 # 1 arg # unconditional
RETURN = 20
UNKNOWN = 99
4 changes: 2 additions & 2 deletions ohre/misc/Log.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ def warn(logstr, print_flag=True):
g_log.warning(logstr)


def error(logstr, print_flag=True):
def error(logstr):
if (get_logger().getEffectiveLevel() <= logging.ERROR):
debug_print(logstr, "error")
g_log.error(logstr)


def critical(logstr, print_flag=True):
def critical(logstr):
if (get_logger().getEffectiveLevel() <= logging.CRITICAL):
debug_print(logstr, "criti")
g_log.critical(logstr)
Expand Down

0 comments on commit 18bb3b4

Please sign in to comment.