From a6535e56a5b659be0181e4d6813bd71e2da310c5 Mon Sep 17 00:00:00 2001 From: Axel Tillequin Date: Sun, 17 Apr 2016 16:53:07 +0200 Subject: [PATCH] squash merge develop branch into v2.4.5 --- README.rst | 15 ++ amoco/arch/x64/asm.py | 279 ++++++++++++++++++++++++++++++++++- amoco/arch/x64/env.py | 2 + amoco/arch/x64/formats.py | 2 +- amoco/arch/x64/spec_ia32e.py | 74 ++++------ amoco/arch/x64/spec_sse.py | 13 +- amoco/arch/x64/utils.py | 62 ++++---- amoco/arch/x86/asm.py | 47 ++++++ amoco/arch/x86/env.py | 2 + amoco/arch/x86/spec_ia32.py | 32 ++-- amoco/arch/x86/spec_sse.py | 1 + amoco/arch/x86/utils.py | 9 +- amoco/system/core.py | 38 ++++- tests/test_arch_x64.py | 99 +++++++++++++ 14 files changed, 566 insertions(+), 109 deletions(-) create mode 100644 tests/test_arch_x64.py diff --git a/README.rst b/README.rst index 55474b6..df8befd 100644 --- a/README.rst +++ b/README.rst @@ -1338,6 +1338,20 @@ Please see `LICENSE`_. Changelog ========= +- `v2.4.5`_ + + * add x86/x64 internals 'mode' selector + * add 'lab' expression for labels + * improve MemoryZone/Map with a 'grep' method + * improve MemoryZone to allow "shifting" to some address + * improve x86 AT&T formatter + * add x64 decoder tests + * fix x64 rip-relative addressing mode + * fix many x64 specs + * add x64 packed-instructions semantics + * fix various x86 SSE instructions + * fix various x86 issues (fisttp/SETcc/PUSH imm8/movq) + - `v2.4.4`_ * add some SSE instruction semantics @@ -1452,6 +1466,7 @@ Changelog .. _ply: http://www.dabeaz.com/ply/ .. _zodb: http://www.zodb.org .. _LICENSE: https://github.com/bdcht/amoco/blob/release/LICENSE +.. _v2.4.5: https://github.com/bdcht/amoco/releases/tag/v2.4.5 .. _v2.4.4: https://github.com/bdcht/amoco/releases/tag/v2.4.4 .. _v2.4.3: https://github.com/bdcht/amoco/releases/tag/v2.4.3 .. _v2.4.2: https://github.com/bdcht/amoco/releases/tag/v2.4.2 diff --git a/amoco/arch/x64/asm.py b/amoco/arch/x64/asm.py index 7b67f0e..cc40e29 100644 --- a/amoco/arch/x64/asm.py +++ b/amoco/arch/x64/asm.py @@ -90,6 +90,15 @@ def i_RET(i,fmap): def i_HLT(i,fmap): fmap[rip] = top(64) +def i_XLATB(i,fmap): + fmap[rip] = fmap[rip]+i.length + _table = bx if i.misc['opdsz']==16 else ebx + REX = i.misc['REX'] + W = 0 + if REX: W=REX[0] + if W==1: _table = rbx + fmap[al] = fmap(mem(_table+al.zeroextend(_table.size),8)) + #------------------------------------------------------------------------------ def _ins_(i,fmap,l): counter = cx if i.misc['adrsz'] else rcx @@ -391,7 +400,7 @@ def i_JMPF(i,fmap): pc = fmap[rip]+i.length #------------------------------------------------------------------------------ -def _loop_(i,fmap,cond): +def _loop_(i,fmap,fcond): pc = fmap[rip]+i.length opdsz = 16 if i.misc['opdsz'] else 64 src = i.operands[0].signextend(64) @@ -402,20 +411,21 @@ def _loop_(i,fmap,cond): W = 0 if REX: W=REX[0] if W==1: counter = rcx + cond = fcond(zf,counter) fmap[counter] = fmap(counter)-1 fmap[rip] = tst(fmap(cond), loc, pc) def i_LOOP(i,fmap): - cond = (counter!=0) - _loop_(i,fmap,cond) + fcond = lambda f,c: (c!=0) + _loop_(i,fmap,fcond) def i_LOOPE(i,fmap): - cond = zf&(counter!=0) - _loop_(i,fmap,cond) + fcond = lambda f,c : f&(c!=0) + _loop_(i,fmap,fcond) def i_LOOPNE(i,fmap): - cond = (~zf)&(counter!=0) - _loop_(i,fmap,cond) + fcond = lambda f,c : (~f)&(c!=0) + _loop_(i,fmap,fcond) #------------------------------------------------------------------------------ def i_LSL(i,fmap): @@ -1253,3 +1263,258 @@ def i_SYSRET(i,fmap): fmap[rip] = top(64) fmap[rsp] = top(64) +def i_PAND(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + x=fmap(op1)&op2 + fmap[op1] = x + +def i_PANDN(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + x=fmap(~op1)&op2 + fmap[op1] = x + +def i_POR(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + x=fmap(op1)|op2 + fmap[op1] = x + +def i_PXOR(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + x=fmap(op1)^op2 + fmap[op1] = x + +def i_MOVD(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + fmap[op1] = op2[0:32].zeroextend(op1.size) + +def i_MOVQ(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + fmap[op1] = op2[0:64].zeroextend(op1.size) + +def sse_MOVSD(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + if op1._is_mem: + src = fmap(op2[0:op1.size]) + elif op2._is_mem: + src = fmap(op2).zeroextend(op1.size) + fmap[op1] = src + +def i_MOVDQU(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + fmap[op1] = fmap(op2) + +def i_MOVDQA(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + fmap[op1] = fmap(op2) + +def i_MOVUPS(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + fmap[op1] = fmap(op2) + +def i_MOVAPS(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + fmap[op1] = fmap(op2) + +def i_PADDB(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + assert op1.size==op2.size + for __i in range(0,op1.size,8): + src1 = fmap(op1[__i:__i+8]) + src2 = fmap(op2[__i:__i+8]) + fmap[op1[__i:__i+8]] = src1+src2 + +def i_PSUBUSB(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + assert op1.size==op2.size + for __i in range(0,op1.size,8): + src1 = fmap(op1[__i:__i+8]) + src2 = fmap(op2[__i:__i+8]) + res = src1-src2 + fmap[op1[__i:__i+8]] = tst(src1src2,src1,src2) + +def i_PMINUB(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + assert op1.size==op2.size + for __i in range(0,op1.size,8): + src1 = fmap(op1[__i:__i+8]) + src2 = fmap(op2[__i:__i+8]) + fmap[op1[__i:__i+8]] = tst(src1>src2.value for v1 in val1] + fmap[op1] = composer(res) + +def i_PSRLD(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + src1 = fmap(op1) + src2 = fmap(op2) + val1 = (src1[i:i+32] for i in range(0,op1.size,32)) + res = [v1>>src2.value for v1 in val1] + fmap[op1] = composer(res) + +def i_PSRLQ(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + src1 = fmap(op1) + src2 = fmap(op2) + val1 = (src1[i:i+64] for i in range(0,op1.size,64)) + res = [v1>>src2.value for v1 in val1] + fmap[op1] = composer(res) + +def i_PSLLQ(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + src1 = fmap(op1) + src2 = fmap(op2) + val1 = (src1[i:i+64] for i in range(0,op1.size,64)) + res = [v1<>(order[j:j+sz]*32) ) + j+=sz + fmap[op1] = composer(dst) + +def i_PSHUFB(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + assert op1.size==op2.size + sz = 4 if op1.size==128 else 3 + src = fmap(op1) + mask = fmap(op2) + for i in range(0,op1.size,8): + srcb = src[i:i+8] + maskb = mask[i:i+8] + indx = maskb[0:sz] + if indx._is_cst: + sta,sto = indx.value*8,indx.value*8+8 + v = src[sta:sto] + src[i:i+8] = tst(maskb[7:8],cst(0,8),v) + src[sta:sto] = tst(maskb[7:8],v,srcb) + else: + src[i:i+8] = tst(maskb[7:8],cst(0,8),top(8)) + fmap[op1] = src + +def i_PINSRW(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + op3 = i.operands[2] + if op2._is_reg: op2 = op2[0:16] + src1 = fmap(op1) + src2 = fmap(op2) + if op3._is_cst: + sta,sto = op3.value*16,op3.value*16+16 + src1[sta:sto] = src2 + else: + src1 = top(src1.size) + fmap[op1] = src1 + +def i_PEXTRW(i,fmap): + fmap[rip] = fmap[rip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + op3 = i.operands[2] + src2 = fmap(op2) + if op3._is_cst: + sta,sto = op3.value*16,op3.value*16+16 + v = src2[sta:sto] + else: + v = top(16) + fmap[op1] = v.zeroextend(op1.size) diff --git a/amoco/arch/x64/env.py b/amoco/arch/x64/env.py index c70c9fd..d0e4aa9 100644 --- a/amoco/arch/x64/env.py +++ b/amoco/arch/x64/env.py @@ -108,3 +108,5 @@ def dr(num): xmmregs = [reg('xmm%d'%n, 128) for n in range(16)] ymmregs = [reg('ymm%d'%n, 256) for n in range(16)] + +internals = {'mode':64} diff --git a/amoco/arch/x64/formats.py b/amoco/arch/x64/formats.py index e415764..64ba693 100644 --- a/amoco/arch/x64/formats.py +++ b/amoco/arch/x64/formats.py @@ -24,7 +24,7 @@ def deref(op): base10 = True else: base10 = False - s += '[%s%s]'%(op.a.base,op.a.disp_to_string(base10)) + s += '[%s%s]'%(op.a.base,op.a.disp_tostring(base10)) return s def opers(i): diff --git a/amoco/arch/x64/spec_ia32e.py b/amoco/arch/x64/spec_ia32e.py index 6e5399a..6a32a67 100644 --- a/amoco/arch/x64/spec_ia32e.py +++ b/amoco/arch/x64/spec_ia32e.py @@ -43,10 +43,13 @@ def prefix_grp2(obj,_pfx): @ispec_ia32("8>[ {66} ]+", _pfx=('opdsz', 16)) def prefix_grp3(obj,_pfx): + if env.internals['mode']==16: _pfx=('opdsz',32) setpfx(obj,_pfx,2) @ispec_ia32("8>[ {67} ]+", _pfx=('adrsz', 32)) def prefix_grp4(obj,_pfx): + if env.internals['mode']==32: _pfx=('adrsz',16) + if env.internals['mode']==16: _pfx=('adrsz',32) setpfx(obj,_pfx,3) @ispec_ia32("8>[ B X R W 0010 ]+", _pfx=('REX', True)) @@ -132,8 +135,12 @@ def ia32_nooperand(obj): @ispec_ia32(" 8>[ {ae} ]", mnemonic = "SCASB", type=type_data_processing) @ispec_ia32(" 8>[ {af} ]", mnemonic = "SCASD", type=type_data_processing) def ia32_strings(obj): - if obj.mnemonic[-1]=='D' and obj.misc['opdsz']: - obj.mnemonic = obj.mnemonic[:-1]+'W' + if obj.mnemonic[-1]=='D': + W,R,X,B = getREX(obj) + if W==1: # REX superseeds 66 prefix + obj.mnemonic = obj.mnemonic[:-1]+'Q' + elif obj.misc['opdsz']==16: + obj.mnemonic = obj.mnemonic[:-1]+'W' # 1 operand #---------- @@ -254,8 +261,9 @@ def ia32_rm32(obj,Mod,RM,data): @ispec_ia32("*>[ {ff} /2 ]", mnemonic = "CALL") @ispec_ia32("*>[ {ff} /4 ]", mnemonic = "JMP") def ia32_rm64(obj,Mod,RM,data): - if not obj.misc['REX']: obj.misc['REX']=(1,0,0,0) - op1,data = getModRM(obj,Mod,RM,data) + obj.misc['opdsz'] = 64 + REX = obj.misc['REX'] or (1,0,0,0) + op1,data = getModRM(obj,Mod,RM,data,REX) obj.operands = [op1] obj.misc['absolute']=True obj.type = type_control_flow @@ -314,11 +322,9 @@ def ia32_imm_rel(obj,cc,data): @ispec_ia32("8>[ rd(3) 0 1001 ]", mnemonic = "XCHG") # 9x def ia32_xchg(obj,rd): size = obj.misc['opdsz'] or 32 - REX = obj.misc['REX'] - if REX: - W,R,X,B = REX - if W==1: size=64 - if R==1: rd = (R<<3)+rd + W,R,X,B = getREX(obj) + if W==1: size=64 + if R==1: rd = (R<<3)+rd op1 = env.getreg(0,size) op2 = env.getreg(rd,size) obj.operands = [op1, op2] @@ -369,10 +375,8 @@ def ia32_mov_adr(obj,data,_flg8,_inv): @ispec_ia32("*>[ {a9} ~data(*) ]", mnemonic = "TEST") def ia32_eax_imm(obj,data): size = immsz = obj.misc['opdsz'] or 32 - REX = obj.misc['REX'] - if REX: - W,R,X,B = REX - if W==1: size=64 + W,R,X,B = getREX(obj) + if W==1: size=64 if data.size[ {c0} /7 ]", mnemonic = "SAR") def ia32_ptr_ib(obj,Mod,RM,data): obj.misc['opdsz']=8 - REX = obj.misc['REX'] - W=0 - if REX: W=REX[0] + W,R,X,B = getREX(obj) op1,data = getModRM(obj,Mod,RM,data) if data.size<8: raise InstructionError(obj) imm = data[0:8] @@ -462,11 +464,9 @@ def ia32_mov_adr(obj,rb,ib): @ispec_ia32("*>[ rb(3) 1 1101 ~data(*) ]", mnemonic = "MOV") # b8+rd id/io def ia32_mov_adr(obj,rb,data): size = obj.misc['opdsz'] or 32 - REX = obj.misc['REX'] - if REX: - W,R,X,B = REX - if W==1: size=64 - if B==1: rb = (B<<3)+rb + W,R,X,B = getREX(obj) + if W==1: size=64 + if B==1: rb = (B<<3)+rb op1 = env.getreg(rb,size) if data.size[ {c1} /6 ]", mnemonic = "SHL") @ispec_ia32("*>[ {c1} /7 ]", mnemonic = "SAR") def ia32_rm32_imm8(obj,Mod,RM,data): - REX = obj.misc['REX'] - W=0 - if REX: W=REX[0] + W,R,X,B = getREX(obj) op1,data = getModRM(obj,Mod,RM,data) if data.size<8: raise InstructionError(obj) imm = data[0:8] @@ -626,15 +624,9 @@ def ia32_arpl(obj,Mod,REG,RM,data,_inv): @ispec_ia32("*>[ {63} /r ]", mnemonic = "MOVSXD") def ia32_movsxd(obj,Mod,REG,RM,data): - REX = obj.misc['REX'] - if REX: W,R,X,B = REX - else: W=0 op1 = getregR(obj,REG,64) - # force 32-bit wide op2 - if W==1: obj.misc['REX'] = (0,R,X,B) - op2,data = getModRM(obj,Mod,RM,data) - # restore original W - if W==1: obj.misc['REX'] = (1,R,X,B) + # force REX.W=0 for op2 decoding: + op2,data = getModRM(obj,Mod,RM,data,REX=(0,R,X,B)) obj.operands = [op1, op2] obj.type = type_data_processing @@ -761,12 +753,10 @@ def ia32_cmpxchg(obj,Mod,RM,data): op2,data = getModRM(obj,Mod,RM,data) if not op2._is_mem: raise InstructionError(obj) op2.size = 64 - REX = obj.misc['REX'] - if REX: - W=REX[0] - if W==1: - obj.mnemonic = "CMPXCHG16B" - op2.size = 128 + W,R,X,B = getREX(obj) + if W==1: + obj.mnemonic = "CMPXCHG16B" + op2.size = 128 obj.operands = [op2] obj.type = type_data_processing @@ -853,11 +843,9 @@ def ia32_ADC_eax_imm(obj,ib): @ispec_ia32("*>[ {0f}{bf} /r ]", mnemonic = "MOVSX", _flg8=False) def ia32_movx(obj,Mod,RM,REG,data,_flg8): size = obj.misc['opdsz'] or 32 - REX = obj.misc['REX'] - if REX: - W,R,X,B = REX - if W==1: size=64 - if R==1: REG = (R<<3)+REG + W,R,X,B = getREX(obj) + if W==1: size=64 + if R==1: REG = (R<<3)+REG op1 = env.getreg(REG,size) obj.misc['opdsz']=8 if _flg8 else 16 op2,data = getModRM(obj,Mod,RM,data) diff --git a/amoco/arch/x64/spec_sse.py b/amoco/arch/x64/spec_sse.py index deddae2..6dc1442 100644 --- a/amoco/arch/x64/spec_sse.py +++ b/amoco/arch/x64/spec_sse.py @@ -398,7 +398,11 @@ def sse_sd(obj,Mod,REG,RM,data): # xmm, r/m32 @ispec_ia32("*>[ {0f}{2a} /r ]", mnemonic="CVTSI2SD") def sse_sd(obj,Mod,REG,RM,data): - if not check_f2(obj,set_opdsz_64): raise InstructionError(obj) + if not check_f2(obj,set_opdsz_32): raise InstructionError(obj) + REX = obj.misc['REX'] + if REX is not None: + W,R,X,B = REX + if W==1: set_opdsz_64() op2,data = getModRM(obj,Mod,RM,data) op1 = getregR(obj,REG,128) obj.operands = [op1,op2] @@ -528,7 +532,11 @@ def sse_sd(obj,Mod,REG,RM,data): # xmm, r/m32 @ispec_ia32("*>[ {0f}{2a} /r ]", mnemonic="CVTSI2SS") def sse_sd(obj,Mod,REG,RM,data): - if not check_f3(obj,set_opdsz_64): raise InstructionError(obj) + if not check_f3(obj,set_opdsz_32): raise InstructionError(obj) + REX = obj.misc['REX'] + if REX is not None: + W,R,X,B = REX + if W==1: set_opdsz_64() op2,data = getModRM(obj,Mod,RM,data) op1 = getregR(obj,REG,128) obj.operands = [op1,op2] @@ -582,6 +590,7 @@ def sse_sd(obj,Mod,REG,RM,data): @ispec_ia32("*>[ {0f}{6a} /r ]", mnemonic="PUNPCKHDQ") @ispec_ia32("*>[ {0f}{6b} /r ]", mnemonic="PACKSSDW") @ispec_ia32("*>[ {0f}{6c} /r ]", mnemonic="PUNPCKLQDQ") +@ispec_ia32("*>[ {0f}{6d} /r ]", mnemonic="PUNPCKHQDQ") @ispec_ia32("*>[ {0f}{6f} /r ]", mnemonic="MOVDQA") @ispec_ia32("*>[ {0f}{74} /r ]", mnemonic="PCMPEQB") @ispec_ia32("*>[ {0f}{75} /r ]", mnemonic="PCMPEQW") diff --git a/amoco/arch/x64/utils.py b/amoco/arch/x64/utils.py index 2464dca..3105c09 100644 --- a/amoco/arch/x64/utils.py +++ b/amoco/arch/x64/utils.py @@ -27,48 +27,48 @@ def __init__(self,format,**kargs): f=format ispec.__init__(self,f,**kargs) -def getregR(obj,REG,size): +def getREX(obj): REX = obj.misc['REX'] if REX is None: W=R=X=B=0 else: W,R,X,B=REX + return (W,R,X,B) + +def getreg8_legacy(x): + return (env.al,env.cl,env.dl,env.bl,env.ah,env.ch,env.dh,env.bh)[x] + +# using REX.R to get ModRM 'reg' register +def getregR(obj,REG,size): + W,R,X,B = getREX(obj) return env.getreg(REG+(R<<3),size) +# using REX.R + REX.W to get ModRM 'reg' register def getregRW(obj,REG,size): - REX = obj.misc['REX'] - if REX is None: - W=R=X=B=0 - else: - W,R,X,B=REX - if W==1: size=64 + W,R,X,B = getREX(obj) + if W==1: size=64 return env.getreg(REG+(R<<3),size) +# using REX.B to get ModRM 'r/m' register def getregB(obj,REG,size): - REX = obj.misc['REX'] - if REX is None: - W=R=X=B=0 - else: - W,R,X,B=REX + W,R,X,B = getREX(obj) return env.getreg(REG+(B<<3),size) # read ModR/M + SIB values and update obj accordingly: -def getModRM(obj,Mod,RM,data): +def getModRM(obj,Mod,RM,data,REX=None): opdsz = obj.misc['opdsz'] or 32 adrsz = obj.misc['adrsz'] or 64 seg = obj.misc['segreg'] if seg is None: seg='' - REX = obj.misc['REX'] - if REX is None: - W=R=X=B=0 - else: - W,R,X,B = REX - if W==1: opdsz = 64 - # r/16/32 case: + W,R,X,B = REX or getREX(obj) + if opdsz!=8 and W==1: opdsz = 64 + # r/16/32/64 case: if Mod==0b11: op1 = env.getreg((B<<3)+RM,opdsz) + if opdsz==8 and obj.misc['REX'] is None: + op1 = getreg8_legacy(RM) return op1,data - # m/16/32 case: + # SIB cases : if adrsz!=16 and RM==0b100: # read SIB byte in data: if data.size<8: raise InstructionError(obj) @@ -93,15 +93,15 @@ def getModRM(obj,Mod,RM,data): env.di, env.bp, env.bx)[RM] - - # check [disp16/32] case: - if (b is env.rbp or b is env.r13) and Mod==0: - b=env.rip - if seg is '': seg = env.cs - Mod = 0b10 - if (b is env.bp) and Mod==0: - b=env.cst(0,adrsz) - Mod = 0b10 + # check special disp32 case (RIP-relative addressing): + if Mod==0: + if RM==0b101: + b=env.rip + if seg is '': seg = env.cs + Mod = 0b10 + elif b.ref in ('rbp','r13'): + b = env.cst(0,adrsz) + Mod = 0b10 # now read displacement bytes: if Mod==0b00: d = 0 @@ -155,6 +155,8 @@ def set_opdsz_mm(obj): obj.misc['opdsz']='mm' def set_opdsz_64(obj): obj.misc['opdsz']=64 +def set_opdsz_32(obj): + obj.misc['opdsz']=32 def check_f2(obj,f=do_nothing): if obj.misc['pfx'] and obj.misc['pfx'][0]=='repne': diff --git a/amoco/arch/x86/asm.py b/amoco/arch/x86/asm.py index 2b5194c..c5b5ba2 100644 --- a/amoco/arch/x86/asm.py +++ b/amoco/arch/x86/asm.py @@ -111,6 +111,11 @@ def i_AAM(i,fmap): fmap[zf] = _r==0 fmap[sf] = _r<0 +def i_XLATB(i,fmap): + fmap[eip] = fmap[eip]+i.length + _table = bx if i.misc['opdsz']==16 else ebx + fmap[al] = fmap(mem(_table+al.zeroextend(_table.size),8)) + #------------------------------------------------------------------------------ def i_BSWAP(i,fmap): fmap[eip] = fmap[eip]+i.length @@ -1242,6 +1247,12 @@ def i_PXOR(i,fmap): x=fmap(op1)^op2 fmap[op1] = x +def i_MOVD(i,fmap): + fmap[eip] = fmap[eip]+i.length + op1 = i.operands[0] + op2 = fmap(i.operands[1]) + fmap[op1] = op2[0:32].zeroextend(op1.size) + def i_MOVQ(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = i.operands[0] @@ -1359,6 +1370,26 @@ def i_PCMPEQB(i,fmap): res = [tst(v1==v2,cst(0xff,8),cst(0,8)) for (v1,v2) in zip(val1,val2)] fmap[op1] = composer(res) +def i_PSRLW(i,fmap): + fmap[eip] = fmap[eip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + src1 = fmap(op1) + src2 = fmap(op2) + val1 = (src1[i:i+16] for i in range(0,op1.size,16)) + res = [v1>>src2.value for v1 in val1] + fmap[op1] = composer(res) + +def i_PSRLD(i,fmap): + fmap[eip] = fmap[eip]+i.length + op1 = i.operands[0] + op2 = i.operands[1] + src1 = fmap(op1) + src2 = fmap(op2) + val1 = (src1[i:i+32] for i in range(0,op1.size,32)) + res = [v1>>src2.value for v1 in val1] + fmap[op1] = composer(res) + def i_PSRLQ(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = i.operands[0] @@ -1379,6 +1410,22 @@ def i_PSLLQ(i,fmap): res = [v1<>(order[j:j+sz]*32) ) + j+=sz + fmap[op1] = composer(dst) + def i_PSHUFB(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = i.operands[0] diff --git a/amoco/arch/x86/env.py b/amoco/arch/x86/env.py index 38d0ea5..0731d62 100644 --- a/amoco/arch/x86/env.py +++ b/amoco/arch/x86/env.py @@ -90,3 +90,5 @@ def cr(num): # debug regs: def dr(num): return is_reg_other(reg('dr%d'%num,32)) + +internals = {'mode': 32} diff --git a/amoco/arch/x86/spec_ia32.py b/amoco/arch/x86/spec_ia32.py index 4dd2ff5..da212f8 100644 --- a/amoco/arch/x86/spec_ia32.py +++ b/amoco/arch/x86/spec_ia32.py @@ -42,10 +42,12 @@ def prefix_grp2(obj,_pfx): @ispec_ia32("8>[ {66} ]+", _pfx=('opdsz', 16)) def prefix_grp3(obj,_pfx): + if env.internals['mode']==16: _pfx=('opdsz',32) setpfx(obj,_pfx,2) @ispec_ia32("8>[ {67} ]+", _pfx=('adrsz', 16)) def prefix_grp4(obj,_pfx): + if env.internals['mode']==16: _pfx=('adrsz',32) setpfx(obj,_pfx,3) # IA32 opcodes: @@ -149,12 +151,12 @@ def ia32_imm8_signed(obj,ib): @ispec_ia32("16>[ {e1} ib(8) ]", mnemonic = "LOOPE", type=type_control_flow) @ispec_ia32("16>[ {e0} ib(8) ]", mnemonic = "LOOPNE", type=type_control_flow) def ia32_imm_rel(obj,ib): - size = obj.misc['adrsz'] or 32 + size = obj.misc['adrsz'] or env.internals['mode'] obj.operands = [env.cst(ib,8).signextend(size)] @ispec_ia32("16>[ {e3} cb(8) ]", mnemonic = "JECXZ", type=type_control_flow) def ia32_cb8(obj,cb): - size = obj.misc['adrsz'] or 32 + size = obj.misc['adrsz'] or env.internals['mode'] if size==16: obj.mnemonic = "JCXZ" obj.operands = [env.cst(cb,8).signextend(size)] @@ -166,7 +168,7 @@ def ia32_retn(obj,iw): # imm16/32: @ispec_ia32("*>[ {68} ~data(*) ]", mnemonic = "PUSH", type=type_data_processing) def ia32_imm32(obj,data): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] if data.size[ {e8} ~data(*) ]", mnemonic = "CALL", type=type_control_flow) @ispec_ia32("*>[ {e9} ~data(*) ]", mnemonic = "JMP", type=type_control_flow) def ia32_imm_rel(obj,data): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] if data.size[ {9a} ~data(*) ]", mnemonic = "CALLF") @ispec_ia32("*>[ {ea} ~data(*) ]", mnemonic = "JMPF") def ia32_far_imm(obj,data): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] if data.size[ reg(3) 0 0010 ]", mnemonic = "INC") #40 +rd @ispec_ia32("*>[ reg(3) 1 0010 ]", mnemonic = "DEC") #48 +rd def ia32_rm32(obj,reg): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] op1 = env.getreg(reg,size) obj.operands = [op1] obj.type = type_data_processing @ispec_ia32("16>[ {0f} reg(3) 1 0011 ]", mnemonic = "BSWAP") # 0f c4 +rd def ia32_bswap(obj,reg): - obj.operands = [env.getreg(reg,32)] + obj.operands = [env.getreg(reg,32)] # BSWAP in not supported for 16-bit operations obj.type = type_data_processing # implicit register: @@ -307,7 +309,7 @@ def ia32_imm_rel(obj,cc,cb): @ispec_ia32("*>[ {0f} cc(4) 0001 ~data(*) ]", mnemonic = "Jcc") # 0f 8x cw/d def ia32_imm_rel(obj,cc,data): obj.cond = CONDITION_CODES[cc] - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] if data.size[ rd(3) 0 1001 ]", mnemonic = "XCHG") # 9x def ia32_xchg(obj,rd): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] op1 = env.getreg(0,size) op2 = env.getreg(rd,size) obj.operands = [op1, op2] @@ -348,10 +350,10 @@ def ia32_al_imm8(obj,ib): @ispec_ia32("*>[ {a2} ~data(*) ]", mnemonic = "MOV", _flg8=True, _inv=True) @ispec_ia32("*>[ {a3} ~data(*) ]", mnemonic = "MOV", _flg8=False, _inv=True) def ia32_mov_adr(obj,data,_flg8,_inv): - opdsz = obj.misc['opdsz'] or 32 + opdsz = obj.misc['opdsz'] or env.internals['mode'] if _flg8: opdsz=8 op1 = env.getreg(0,opdsz) - adrsz = obj.misc['adrsz'] or 32 + adrsz = obj.misc['adrsz'] or env.internals['mode'] seg = obj.misc['segreg'] if seg is None: seg='' if data.size[ {3d} ~data(*) ]", mnemonic = "CMP") @ispec_ia32("*>[ {a9} ~data(*) ]", mnemonic = "TEST") def ia32_eax_imm(obj,data): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] if data.size[ rb(3) 1 1101 ~data(*) ]", mnemonic = "MOV") # b8+rd id def ia32_mov_adr(obj,rb,data): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] op1 = env.getreg(rb,size) if data.size[ {e5} ib(8) ]", mnemonic = "IN") @ispec_ia32("16>[ {e7} ib(8) ]", mnemonic = "OUT") def ia32_ADC_eax_imm(obj,ib): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] r = env.eax if size==32 else env.ax x = env.cst(ib,8) obj.operands = [r,x] if obj.mnemonic=='IN' else [x,r] @@ -855,7 +857,7 @@ def ia32_ADC_eax_imm(obj,ib): @ispec_ia32("*>[ {0f}{be} /r ]", mnemonic = "MOVSX", _flg8=True) @ispec_ia32("*>[ {0f}{bf} /r ]", mnemonic = "MOVSX", _flg8=False) def ia32_movx(obj,Mod,RM,REG,data,_flg8): - size = obj.misc['opdsz'] or 32 + size = obj.misc['opdsz'] or env.internals['mode'] op1 = env.getreg(REG,size) obj.misc['opdsz']=8 if _flg8 else 16 op2,data = getModRM(obj,Mod,RM,data) diff --git a/amoco/arch/x86/spec_sse.py b/amoco/arch/x86/spec_sse.py index 94031dc..5f85835 100644 --- a/amoco/arch/x86/spec_sse.py +++ b/amoco/arch/x86/spec_sse.py @@ -577,6 +577,7 @@ def sse_sd(obj,Mod,REG,RM,data): @ispec_ia32("*>[ {0f}{6a} /r ]", mnemonic="PUNPCKHDQ") @ispec_ia32("*>[ {0f}{6b} /r ]", mnemonic="PACKSSDW") @ispec_ia32("*>[ {0f}{6c} /r ]", mnemonic="PUNPCKLQDQ") +@ispec_ia32("*>[ {0f}{6d} /r ]", mnemonic="PUNPCKHQDQ") @ispec_ia32("*>[ {0f}{6f} /r ]", mnemonic="MOVDQA") @ispec_ia32("*>[ {0f}{74} /r ]", mnemonic="PCMPEQB") @ispec_ia32("*>[ {0f}{75} /r ]", mnemonic="PCMPEQW") diff --git a/amoco/arch/x86/utils.py b/amoco/arch/x86/utils.py index df64374..b606142 100644 --- a/amoco/arch/x86/utils.py +++ b/amoco/arch/x86/utils.py @@ -27,17 +27,17 @@ def __init__(self,format,**kargs): f=format ispec.__init__(self,f,**kargs) -# read ModR/M + SIB values and update obj accordingly: +# read ModR/M + SIB values and update obj: def getModRM(obj,Mod,RM,data): - opdsz = obj.misc['opdsz'] or 32 - adrsz = obj.misc['adrsz'] or 32 + opdsz = obj.misc['opdsz'] or env.internals['mode'] + adrsz = obj.misc['adrsz'] or env.internals['mode'] seg = obj.misc['segreg'] if seg is None: seg='' # r/16/32 case: if Mod==0b11: op1 = env.getreg(RM,opdsz) return op1,data - # m/16/32 case: + # 32-bit SIB cases: if adrsz==32 and RM==0b100: # read SIB byte in data: if data.size<8: raise InstructionError(obj) @@ -62,7 +62,6 @@ def getModRM(obj,Mod,RM,data): env.di, env.bp, env.bx)[RM] - # check [disp16/32] case: if (b is env.ebp or b is env.bp) and Mod==0: b=env.cst(0,adrsz) diff --git a/amoco/system/core.py b/amoco/system/core.py index 6022cd9..1285bb4 100644 --- a/amoco/system/core.py +++ b/amoco/system/core.py @@ -195,10 +195,10 @@ def read(self,vaddr,l): res = [] i = self.locate(vaddr) if i is None: - if len(self._map)==0: return [void(l*8)] + if len(self._map)==0: return [void(l*8L)] v0 = self._map[0].vaddr - if (vaddr+l)<=v0: return [void(l*8)] - res.append(void((v0-vaddr)*8)) + if (vaddr+l)<=v0: return [void(l*8L)] + res.append(void((v0-vaddr)*8L)) l = (vaddr+l)-v0 vaddr = v0 i = 0 @@ -207,14 +207,14 @@ def read(self,vaddr,l): try: data,ll = self._map[i].read(vaddr,ll) except IndexError: - res.append(void(ll*8)) - ll=0 + res.append(void(ll*8L)) + ll=0L break if data is None: vi = self.__cache[i] if vaddr < vi: l = min(vaddr+ll,vi)-vaddr - data = void(l*8) + data = void(l*8L) ll -= l i -=1 if data is not None: @@ -287,6 +287,24 @@ def restruct(self): self._map = m self.__update_cache() + def shift(self,offset): + for z in self._map: + z.vaddr += offset + self.__update_cache() + + def grep(self,pattern): + import re + g = re.compile(pattern) + res = [] + for z in self._map: + if z.data._is_raw: + off=0 + for s in g.findall(z.data.val): + off = z.data.val.index(s,off) + res.append(z.vaddr+off) + off += len(s) + return res + #------------------------------------------------------------------------------ class MemoryMap(object): __slot__ = ['_zones','perms'] @@ -343,6 +361,14 @@ def write(self,address,expr,deadzone=False): def restruct(self): for z in self._zones.itervalues(): z.restruct() + def grep(self,pattern): + res = [] + for z in self._zones.values(): + zres = z.grep(pattern) + if z.rel is not None: zres = [z.rel+r for r in zres] + res.extend(zres) + return res + #------------------------------------------------------------------------------ class CoreExec(object): __slots__ = ['bin','cpu','mmap'] diff --git a/tests/test_arch_x64.py b/tests/test_arch_x64.py new file mode 100644 index 0000000..e9bcc54 --- /dev/null +++ b/tests/test_arch_x64.py @@ -0,0 +1,99 @@ +import pytest + +from amoco.arch.x64 import cpu_x64 as cpu +from amoco.arch.x64.env import * + +# enforce Intel syntax and NullFormatter output: +cpu.configure(format='Intel') +from amoco.ui import render +render.configure(formatter='Null') + +def test_decoder_000(): + c = '\x90' + i = cpu.disassemble(c) + assert i.mnemonic=='NOP' + +def test_decoder_001(): + c = 'f\x0fo\x04%\xbc\x00`\x00' + i = cpu.disassemble(c) + assert i.mnemonic=='MOVDQA' + assert i.operands[0].ref == 'xmm0' + assert i.operands[1].a.base == 0x6000bc + +# movsx rax, al +def test_decoder_002(): + c = '\x48\x0f\xbe\xc0' + i = cpu.disassemble(c) + assert i.mnemonic=='MOVSX' + assert i.operands[0].ref == 'rax' + assert i.operands[1].ref == 'al' + +def test_decoder_003(): + c = '\x48\x8b\x04\xc5\0\0\0\0' + i = cpu.disassemble(c) + assert i.operands[1].a.base==(rax*8) + +def test_decoder_004(): + c = '\x64\x48\x8b\x04\x25\x28\0\0\0' + i = cpu.disassemble(c) + assert i.operands[1].a.base==40 + +def test_decoder_005(): + c = '\x8b\x2c\x25\x00\x00\x00\x00' + i = cpu.disassemble(c) + assert i.operands[1].a.base==0 + +def test_decoder_006(): + c = '\x80\xcc\x0c' + i = cpu.disassemble(c) + assert i.operands[0].ref == 'ah' + +def test_decoder_007(): + c = '\x40\x80\xcc\x0c' + i = cpu.disassemble(c) + assert i.operands[0].ref == 'spl' + +def test_decoder_008(): + c = '48B88877665544332211'.decode('hex') + i = cpu.disassemble(c) + assert i.operands[1]==0x1122334455667788 + +def test_decoder_009(): + c = '\xf3\x0f\x2a\xc0' + i = cpu.disassemble(c) + assert i.mnemonic=='CVTSI2SS' + assert i.operands[1].ref == 'eax' + +def test_decoder_010(): + c = '488d0c59'.decode('hex') + i = cpu.disassemble(c) + assert i.operands[1].a.base==((rbx*0x2)+rcx) + +def test_decoder_011(): + c = '41ffd7'.decode('hex') + i = cpu.disassemble(c) + assert i.mnemonic=='CALL' + assert i.operands[0].ref == 'r15' + +def test_decoder_012(): + c = '488b0d19000000'.decode('hex') + i = cpu.disassemble(c) + assert i.mnemonic=='MOV' + assert i.operands[0].ref == 'rcx' + assert i.operands[1].a.base == rip + assert i.operands[1].a.disp == 0x19 + +# mov ebx, dword ptr [rsp+0xc] +def test_decoder_013(): + c = '8b5c240c'.decode('hex') + i = cpu.disassemble(c) + assert i.mnemonic=='MOV' + assert i.operands[0].ref == 'ebx' + assert i.operands[1].size == 32 + assert i.operands[1].a.base == rsp + assert i.operands[1].a.disp == 0xc + +def test_decoder_014(): + c = '\x48\xa5' + i = cpu.disassemble(c) + assert i.mnemonic=='MOVSQ'