Skip to content

Commit

Permalink
Squashed commit of the develop branch
Browse files Browse the repository at this point in the history
    add eBPF architecture
    clean riscv system class
    merge LRGH-release commits up to cc51e9b
    add ExitProcess stub
    use pdb set_trace at entry of itercfg only
    guard unicode usage
    fix movnti semantics
    feedback for some x86 unimplemented semantics
    adjust ext symbol size
    fix logger progress and adjust default log level
    take LRGH-release commits into account
    add 'bitslicing' simplification option which decompose a logical
    expression into a bitsliced compound expression
    fix Solver 'get_mapper' method
    add getfileoffset method in PE/ELF classes
    fix Symbol table str method
    fix risc-v semantics & specs related to shifts
    fix mapper usemmap
    fix riscv semantics
    add simple dot format export of cfg object
    fix graph vertex comparison
    fix node/block cut
    fix logger progress method
    add unsigned < and >= operators
    add risc-v system interface
    add risc-v architecture
    fix unicode output
    fixing shift/rotate symbols
    change arithmetic right shift symbol
    allow unicode operators symbols
    support translating long multiply expressions to z3
  • Loading branch information
bdcht committed May 27, 2017
1 parent 9f7ab79 commit 6fba649
Show file tree
Hide file tree
Showing 54 changed files with 1,862 additions and 371 deletions.
18 changes: 18 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ Please see `LICENSE`_.
Changelog
=========

- `v2.5.1`_

* add RISC-V architecture
* add eBPF architecture
* use unicode outputs with unicode symbols for most operators if supported
* add less-than-unsigned (ltu) and greater-or-equal-unsigned (geu) operators needed by riscv architecture
* fix simplify method for comp expressions
* improve simplify methods with a 'bitslice' optional parameter
* improve mapper memory interface
* fix smt model_to_mapper method
* add cfg 'dot' format (elementary) output
* add 'getfileoffset' method in ELF and PE classes
* remove global endianness flag
* update x86/x64 formats to be compatible with gnu as and clang
* add some x86/x64 instructions semantics (movnti, wbinvd, div, ...)
* fix some x86/x64 rare instructions specs (pmovmskb, ...)

- `v2.5.0`_

* support python3 (>=3.5)
Expand Down Expand Up @@ -219,6 +236,7 @@ Changelog
.. _ply: http://www.dabeaz.com/ply/
.. _sqlalchemy: http://www.sqlalchemy.org
.. _LICENSE: https://github.com/bdcht/amoco/blob/release/LICENSE
.. _v2.5.1: https://github.com/bdcht/amoco/releases/tag/v2.5.1
.. _v2.5.0: https://github.com/bdcht/amoco/releases/tag/v2.5.0
.. _v2.4.6: https://github.com/bdcht/amoco/releases/tag/v2.4.6
.. _v2.4.5: https://github.com/bdcht/amoco/releases/tag/v2.4.5
Expand Down
8 changes: 4 additions & 4 deletions amoco/arch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ def __init__(self,istr):

def __repr__(self):
s = inspect.getmodule(self.spec.hook).__name__ if self.spec else ''
if self.address is not None: s += ' [%s] '%str(self.address)
s += " %s ( "%self.mnemonic
if self.address is not None: s += u' [%s] '%self.address
s += u" %s ( "%self.mnemonic
for k,v in inspect.getmembers(self):
if k in ('address','mnemonic','bytes','spec','operands','misc','formatter'): continue
if k.startswith('_') or inspect.ismethod(v): continue
s += '%s=%s '%(k,str(v))
return '<%s)>'%s
s += u'%s=%s '%(k,v)
return u'<%s)>'%s

@classmethod
def set_formatter(cls,f):
Expand Down
Empty file added amoco/arch/eBPF/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions amoco/arch/eBPF/asm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-

# This code is part of Amoco
# Copyright (C) 2017 Axel Tillequin (bdcht3@gmail.com)
# published under GPLv2 license

from .env import *

def __npc(i_xxx):
def npc(ins,fmap):
fmap[pc] = fmap(pc)+ins.length
i_xxx(ins,fmap)
return npc

# i_xxx is the translation of eBPF instruction xxx.
#------------------------------------------------------------------------------

21 changes: 21 additions & 0 deletions amoco/arch/eBPF/cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-

from amoco.arch.eBPF.asm import *
# expose "microarchitecture" (instructions semantics)
uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().items()))

#import specifications:
from amoco.arch.core import instruction, disassembler
instruction_eBPF = type('instruction_eBPF',(instruction,),{})
instruction_eBPF.set_uarch(uarch)

from amoco.arch.eBPF.formats import eBPF_full
instruction_eBPF.set_formatter(eBPF_full)

#define disassembler:
from amoco.arch.eBPF import spec

disassemble = disassembler([spec],iclass=instruction_eBPF)

def PC():
return pc
32 changes: 32 additions & 0 deletions amoco/arch/eBPF/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-

# This code is part of Amoco
# Copyright (C) 2017 Axel Tillequin (bdcht3@gmail.com)
# published under GPLv2 license

# import expressions:
from amoco.cas.expressions import *

#reference documentation:
# https://www.kernel.org/doc/Documentation/networking/filter.txt

r0 = reg('r0', 64) #return value from in-kernel function, exit value
r1 = reg('r1', 64) #arguments for eBPF prog to in-kernel functions
r2 = reg('r2', 64)
r3 = reg('r3', 64)
r4 = reg('r4', 64)
r5 = reg('r5', 64)
r6 = reg('r6', 64) #callee saved regs preserved by in-kernel functions
r7 = reg('r7', 64)
r8 = reg('r8', 64)
r9 = reg('r9', 64)

r10 = reg('r10',64) #read-only frame pointer to access stack
is_reg_stack(r10)

R = [r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10]
E = [slc(R[i],0,32,'e%d'%i) for i in range(10)]

pc = reg('pc',64)
is_reg_pc(pc)

49 changes: 49 additions & 0 deletions amoco/arch/eBPF/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-

from .env import *
from amoco.cas.expressions import regtype
from amoco.arch.core import Formatter,Token

def mnemo(i):
mn = i.mnemonic.lower()
return [(Token.Mnemonic,'{: <12}'.format(mn))]

def deref(opd):
return '[%s+%d]'%(opd.a.base,opd.a.disp)

def opers(i):
s = []
for op in i.operands:
if op._is_mem:
s.append((Token.Memory,deref(op)))
elif op._is_cst:
if i.misc['imm_ref'] is not None:
s.append((Token.Address,u'%s'%(i.misc['imm_ref'])))
elif op.sf:
s.append((Token.Constant,'%+d'%op.value))
else:
s.append((Token.Constant,op.__str__()))
elif op._is_reg:
s.append((Token.Register,op.__str__()))
s.append((Token.Literal,', '))
if len(s)>0: s.pop()
return s

def opers_adr(i):
s = opers(i)
if i.address is None:
s[-1] = (Token.Address,u'.%+d'%i.operands[-1])
else:
imm_ref = i.address+i.length+(i.operands[-1]*8)
s[-1] = (Token.Address,u'#%s'%(imm_ref))
return s

format_default = (mnemo,opers)

eBPF_full_formats = {
'ebpf_jmp_': (mnemo,opers_adr),
}

eBPF_full = Formatter(eBPF_full_formats)
eBPF_full.default = format_default

154 changes: 154 additions & 0 deletions amoco/arch/eBPF/spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# -*- coding: utf-8 -*-

# This code is part of Amoco
# Copyright (C) 2017 Axel Tillequin (bdcht3@gmail.com)
# published under GPLv2 license

# spec_xxx files are providers for instruction objects.
# These objects are wrapped and created by disasm.py.

from amoco.arch.eBPF import env

from amoco.arch.core import *

#-------------------------------------------------------
# instruction eBPF decoders
# refs:
# + www.kernel.org/doc/Documentation/networking/filter.txt
# + linux/v4.11.3/source/kernel/bpf/*
#-------------------------------------------------------

ISPECS = []

# ALU_32 instructions:
@ispec("64>[ 001 s 0000 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="add")
@ispec("64>[ 001 s 1000 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="sub")
@ispec("64>[ 001 s 0100 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mul")
@ispec("64>[ 001 s 1100 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="div")
@ispec("64>[ 001 s 0010 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="or")
@ispec("64>[ 001 s 1010 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="and")
@ispec("64>[ 001 s 0110 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="lsh")
@ispec("64>[ 001 s 1110 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="rsh")
@ispec("64>[ 001 s 0001 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="neg")
@ispec("64>[ 001 s 1001 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mod")
@ispec("64>[ 001 s 0101 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="xor")
@ispec("64>[ 001 s 1101 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mov")
@ispec("64>[ 001 s 0011 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="arsh")
@ispec("64>[ 001 s 1011 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="end")
def ebpf_alu_(obj,s,dreg,sreg,off,imm):
dst = env.E[dreg]
src = env.cst(imm.int(-1),32) if s==0 else env.E[sreg]
src.sf = True
if obj.mnemonic in ('or','and','xor','neg','end'):
src.sf = False
obj.operands = [dst,src]
obj.type = type_data_processing

# ALU_64 instructions:
@ispec("64>[ 111 s 0000 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="add")
@ispec("64>[ 111 s 1000 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="sub")
@ispec("64>[ 111 s 0100 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mul")
@ispec("64>[ 111 s 1100 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="div")
@ispec("64>[ 111 s 0010 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="or")
@ispec("64>[ 111 s 1010 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="and")
@ispec("64>[ 111 s 0110 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="lsh")
@ispec("64>[ 111 s 1110 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="rsh")
@ispec("64>[ 111 s 0001 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="neg")
@ispec("64>[ 111 s 1001 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mod")
@ispec("64>[ 111 s 0101 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="xor")
@ispec("64>[ 111 s 1101 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="mov")
@ispec("64>[ 111 s 0011 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="arsh")
@ispec("64>[ 111 s 1011 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="end")
def ebpf_alu_(obj,s,dreg,sreg,off,imm):
dst = env.R[dreg]
src = env.cst(imm.int(-1),32).zeroextend(64) if s==0 else env.R[sreg]
src.sf = True
if obj.mnemonic in ('or','and','xor','neg','end'):
src.sf = False
obj.operands = [dst,src]
obj.type = type_data_processing

@ispec("64>[ 101 s 0000 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="ja")
@ispec("64>[ 101 s 1000 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jeq")
@ispec("64>[ 101 s 0100 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jgt")
@ispec("64>[ 101 s 1100 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jge")
@ispec("64>[ 101 s 0010 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jset")
@ispec("64>[ 101 s 1010 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jne")
@ispec("64>[ 101 s 0110 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jsgt")
@ispec("64>[ 101 s 1110 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="jsge")
def ebpf_jmp_(obj,s,dreg,sreg,off,imm):
dst = env.R[dreg]
src = env.cst(imm.int(-1),64) if s==0 else env.R[sreg]
offset = env.cst(off.int(-1),64)
obj.operands = [dst,src,offset]
obj.type = type_control_flow

@ispec("64>[ 101 s 0001 dreg(4) sreg(4) ~off(16) imm(32) ]", mnemonic="call")
def ebpf_call_(obj,s,dreg,sreg,off,imm):
obj.operands = [env.cst(imm,32)]
obj.type = type_other

@ispec("64>[ 101 s 1001 dreg(4) sreg(4) ~off(16) imm(32) ]", mnemonic="exit")
def ebpf_exit_(obj,s,dreg,sreg,off,imm):
obj.operands = []
obj.type = type_control_flow

# MEM LOAD from reg instructions:
@ispec("64>[ 100 sz(2) 110 dreg(4) sreg(4) ~off(16) imm(32) ]", mnemonic="ldx")
def ebpf_ldx_(obj,sz,dreg,sreg,off,imm):
size = {0:32,1:16,2:8,3:64}[sz]
if imm!=0: raise InstructionError(obj)
dst = env.R[dreg]
src = env.R[sreg]
src = env.mem(src+off.int(-1),size)
obj.operands = [dst,src]
obj.mnemonic += {8:'b',16:'h',32:'w',64:'dw'}[size]
obj.type = type_data_processing

# MEM STORE immediate or reg instructions:
@ispec("64>[ 010 sz(2) 110 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="st")
@ispec("64>[ 110 sz(2) 110 dreg(4) sreg(4) ~off(16) ~imm(32) ]", mnemonic="stx")
def ebpf_st_(obj,sz,dreg,sreg,off,imm):
size = {0:32,1:16,2:8,3:64}[sz]
dst = env.mem(env.R[dreg]+off.int(-1),size)
if obj.mnemonic=='stx':
src = env.R[sreg]
if imm!=0: raise InstructionError(obj)
else:
src = env.cst(imm.int(-1),32).zeroextend(64)
src = src[0:size]
obj.operands = [dst,src]
obj.mnemonic += {8:'b',16:'h',32:'w',64:'dw'}[size]
obj.type = type_data_processing

# XADD instructions:
@ispec("64>[ 110 sz(2) 011 dreg(4) sreg(4) ~off(16) imm(32) ]", mnemonic="xadd")
def ebpf_xadd_(obj,sz,dreg,sreg,off,imm):
size = {0:32,1:16,2:8,3:64}[sz]
if (size < 32) or imm!=0: raise InstructionError(obj)
dst = env.mem(env.R[dreg]+off.int(-1),size)
src = env.R[sreg][0:size]
obj.operands = [dst,src]
obj.mnemonic += {8:'b',16:'h',32:'w',64:'dw'}[size]
obj.type = type_data_processing

# IMM LOAD instructions
@ispec("128>[ 000 11 000 dreg(4) sreg(4) off(16) imm(32) unused(32) imm2(32) ]", mnemonic="lddw")
def ebpf_ld64_(obj,dreg,sreg,off,imm,unused,imm2):
dst = env.R[dreg]
src = env.cst(imm|(imm2<<32),64)
obj.operands = [dst,src]
obj.type = type_data_processing

# ABS/IND LOAD instructions:
@ispec("64>[ 000 sz(2) 100 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="ld", _abs=True)
@ispec("64>[ 000 sz(2) 010 dreg(4) sreg(4) off(16) ~imm(32) ]", mnemonic="ld", _abs=False)
def ebpf_ld_(obj,sz,dreg,sreg,off,imm,_abs):
size = {0:32,1:16,2:8,3:64}[sz]
dst = env.R[0]
adr = env.reg('#skb',64)
if not _abs: adr += env.R[sreg]
src = env.mem(adr,disp=imm.int(-1))
obj.operands = [dst,src]
obj.mnemonic += {8:'b',16:'h',32:'w',64:'dw'}[size]
obj.type = type_data_processing
2 changes: 1 addition & 1 deletion amoco/arch/msp430/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = Log(__name__)
#logger.level = 10

from amoco.arch.core import instruction
from amoco.arch.msp430.cpu import instruction_msp430 as instruction
from amoco.arch.msp430 import env

#------------------------------------------------------------------------------
Expand Down
Empty file added amoco/arch/riscv/__init__.py
Empty file.
Loading

0 comments on commit 6fba649

Please sign in to comment.