diff --git a/include/remill/Arch/Name.h b/include/remill/Arch/Name.h index 3df3ebe5e..61ca68704 100644 --- a/include/remill/Arch/Name.h +++ b/include/remill/Arch/Name.h @@ -81,6 +81,7 @@ enum ArchName : uint32_t { kArchAMD64_AVX, kArchAMD64_AVX512, + kArchThumb2LittleEndian, kArchAArch32LittleEndian, kArchAArch64LittleEndian, diff --git a/lib/Arch/AArch32/Arch.cpp b/lib/Arch/AArch32/Arch.cpp index e58d9ee07..ac41a2e58 100644 --- a/lib/Arch/AArch32/Arch.cpp +++ b/lib/Arch/AArch32/Arch.cpp @@ -55,11 +55,23 @@ AArch32Arch::~AArch32Arch(void) {} // TODO(pag): Eventually handle Thumb2 and unaligned addresses. uint64_t AArch32Arch::MinInstructionAlign(void) const { - return 4; + switch (arch_name) { + case kArchAArch32LittleEndian: return 4; + case kArchThumb2LittleEndian: return 2; + default: + LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 " + "architecture " << GetArchName(arch_name); + } } uint64_t AArch32Arch::MinInstructionSize(void) const { - return 4; + switch (arch_name) { + case kArchAArch32LittleEndian: return 4; + case kArchThumb2LittleEndian: return 2; + default: + LOG(FATAL) << "Cannot get minimum instruction alignment for non-aarch32 " + "architecture " << GetArchName(arch_name); + } } // Maximum number of bytes in an instruction for this particular architecture. @@ -77,6 +89,7 @@ llvm::Triple AArch32Arch::Triple(void) const { auto triple = BasicTriple(); switch (arch_name) { case kArchAArch32LittleEndian: triple.setArch(llvm::Triple::arm); break; + case kArchThumb2LittleEndian: triple.setArch(llvm::Triple::thumb); break; default: LOG(FATAL) << "Cannot get triple for non-aarch32 architecture " << GetArchName(arch_name); @@ -159,6 +172,7 @@ void AArch32Arch::PopulateRegisterTable(void) const { REG(C, sr.c, u8); REG(Z, sr.z, u8); REG(V, sr.v, u8); + REG(T, sr.t, u8); } diff --git a/lib/Arch/AArch32/CMakeLists.txt b/lib/Arch/AArch32/CMakeLists.txt index 293aff642..796d63892 100644 --- a/lib/Arch/AArch32/CMakeLists.txt +++ b/lib/Arch/AArch32/CMakeLists.txt @@ -27,7 +27,8 @@ add_library(remill_arch_aarch32 STATIC Arch.cpp Decode.cpp -# Decode.h + DecodeThumb2.cpp + Decode.h # Extract.cpp ) diff --git a/lib/Arch/AArch32/Decode.cpp b/lib/Arch/AArch32/Decode.cpp index ad2e786c2..95d77507b 100644 --- a/lib/Arch/AArch32/Decode.cpp +++ b/lib/Arch/AArch32/Decode.cpp @@ -19,11 +19,13 @@ #include #include "Arch.h" +#include "Decode.h" #include "remill/BC/ABI.h" +#include "remill/Arch/Name.h" namespace remill { -namespace { +namespace aarch32 { // Integer Data Processing (three register, register shift) union IntDataProcessingRRRR { @@ -562,17 +564,8 @@ union SpecialRegsAndHints { static_assert(sizeof(SpecialRegsAndHints) == 4, " "); static constexpr auto kAddressSize = 32u; -static constexpr auto kPCRegNum = 15u; -static constexpr auto kLRRegNum = 14u; -static const char *const kIntRegName[] = { - "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", - "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"}; - -typedef bool(TryDecode)(Instruction &, uint32_t); -typedef std::optional(InstEval)(uint32_t, uint32_t); - -static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, +void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, Operand::Action action) { Operand::Register reg; reg.size = size; @@ -581,7 +574,7 @@ static void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, op.action = action; } -static void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, +void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, Operand::Action action) { Operand::Register reg; reg.size = size; @@ -601,8 +594,8 @@ static void AddExprOp(Instruction &inst, OperandExpression *op_expr, op.action = action; } -static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, - bool is_signed = false) { +void AddImmOp(Instruction &inst, uint64_t value, unsigned size, + bool is_signed) { Operand::Immediate imm; imm.val = value; imm.is_signed = is_signed; @@ -611,9 +604,9 @@ static void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, op.size = size; } -static void AddAddrRegOp(Instruction &inst, const char *reg_name, - unsigned mem_size, Operand::Action mem_action, - unsigned disp, unsigned scale = 0) { +void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size, + Operand::Action mem_action, + unsigned disp, unsigned scale) { Operand::Address addr; addr.address_size = 32; addr.base_reg.name = reg_name; @@ -910,7 +903,7 @@ static void AddShiftImmCarryOperand(Instruction &inst, uint32_t reg_num, // (shift_t, shift_n) = DecodeImmShift(type, imm5); // (shifted, carry) = Shift_C(R[m], shift_t, shift_n, PSTATE.C); // See an instruction in Integer Data Processing (three register, immediate shift) set for an example -static void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, +void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, uint32_t shift_type, uint32_t shift_size, bool carry_out, bool can_shift_right_by_32) { auto is_rrx = false; @@ -1193,7 +1186,7 @@ static bool EvalPCDest(Instruction &inst, const bool s, const unsigned int rd, auto src2 = EvalOperand(inst, inst.operands[4], uses_linkreg); AddAddrRegOp(inst, kNextPCVariableName.data(), kAddressSize, - Operand::kActionWrite, 0); + Operand::kActionWrite, 0u); if (uses_linkreg) { @@ -3556,14 +3549,14 @@ static TryDecode *TryDataProcessingAndMisc(uint32_t bits) { // This is the top level of the instruction encoding schema for AArch32. // Instructions are grouped into subsets based on this the top level and then // into smaller sets. -// cond op0 op1 +// cond op0 op1 // != 1111 00x Data-processing and miscellaneous instructions // != 1111 010 Load/Store Word, Unsigned Byte (immediate, literal) // != 1111 011 0 Load/Store Word, Unsigned Byte (register) // != 1111 011 1 Media instructions -// 10x Branch, branch with link, and block data transfer -// 11x System register access, Advanced SIMD, floating-point, and Supervisor call -// 1111 0xx Unconditional instructions +// 10x Branch, branch with link, and block data transfer +// 11x System register access, Advanced SIMD, floating-point, and Supervisor call +// 1111 0xx Unconditional instructions static TryDecode *TryDecodeTopLevelEncodings(uint32_t bits) { const TopLevelEncodings enc = {bits}; @@ -3628,7 +3621,7 @@ static uint32_t BytesToBits(const uint8_t *bytes) { bits = (bits << 8) | static_cast(bytes[0]); return bits; } -} // namespace +} // namespace aarch32 // Decode an instruction bool AArch32Arch::DecodeInstruction(uint64_t address, @@ -3642,7 +3635,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, inst.has_branch_taken_delay_slot = false; inst.has_branch_not_taken_delay_slot = false; inst.arch_name = arch_name; - inst.sub_arch_name = arch_name; // TODO(pag): Thumb. + inst.sub_arch_name = arch_name; inst.arch = this; inst.category = Instruction::kCategoryInvalid; inst.operands.clear(); @@ -3662,9 +3655,13 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, } const auto bytes = reinterpret_cast(inst.bytes.data()); - const auto bits = BytesToBits(bytes); + const auto bits = aarch32::BytesToBits(bytes); + + if (arch_name == kArchThumb2LittleEndian) { + return aarch32::DecodeThumb2Instruction(inst, bits); + } - auto decoder = TryDecodeTopLevelEncodings(bits); + auto decoder = aarch32::TryDecodeTopLevelEncodings(bits); if (!decoder) { LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; return false; @@ -3672,7 +3669,7 @@ bool AArch32Arch::DecodeInstruction(uint64_t address, auto ret = decoder(inst, bits); - // LOG(ERROR) << inst.Serialize(); + LOG(ERROR) << inst.Serialize(); return ret; } diff --git a/lib/Arch/AArch32/Decode.h b/lib/Arch/AArch32/Decode.h new file mode 100644 index 000000000..082b6eab9 --- /dev/null +++ b/lib/Arch/AArch32/Decode.h @@ -0,0 +1,55 @@ +/* + * Decode.h + * + * Created on: Feb 15, 2022 + * Author: sonyaschriner + */ + +#pragma once + +#include + +namespace remill { + +class Instruction; + +namespace aarch32 { + +bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits); + +typedef bool(TryDecode)(Instruction &, uint32_t); +typedef bool(TryDecode16)(Instruction &, uint16_t); + +static constexpr auto kPCRegNum = 15u; +static constexpr auto kLRRegNum = 14u; +static constexpr auto kSPRegNum = 13u; + +static const char *const kIntRegName[] = { + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"}; + +typedef std::optional(InstEval)(uint32_t, uint32_t); + +//bool DecodeCondition(Instruction &inst, uint32_t cond); + +void AddIntRegOp(Instruction &inst, unsigned index, unsigned size, + Operand::Action action); + +void AddIntRegOp(Instruction &inst, const char *reg_name, unsigned size, + Operand::Action action); + +void AddAddrRegOp(Instruction &inst, const char *reg_name, unsigned mem_size, + Operand::Action mem_action, + unsigned disp, unsigned scale = 0); + +void AddImmOp(Instruction &inst, uint64_t value, unsigned size = 32, + bool is_signed = false); + +void AddShiftRegImmOperand(Instruction &inst, uint32_t reg_num, + uint32_t shift_type, uint32_t shift_size, + bool carry_out, bool can_shift_right_by_32); + + +} +} + diff --git a/lib/Arch/AArch32/DecodeThumb2.cpp b/lib/Arch/AArch32/DecodeThumb2.cpp new file mode 100644 index 000000000..4117b5683 --- /dev/null +++ b/lib/Arch/AArch32/DecodeThumb2.cpp @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2022 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "Arch.h" +#include "Decode.h" +#include "remill/BC/ABI.h" +#include "remill/Arch/Name.h" + +namespace remill { + +namespace aarch32 { + +// Add, subtract (three low registers) +union AddSub3LowReg16 { + uint16_t flat; + struct { + uint16_t Rd : 3; + uint16_t Rn : 3; + uint16_t Rm : 3; + uint16_t S : 1; + uint16_t _000110 : 6; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSub3LowReg16) == 2, " "); + +// Add, subtract (two low registers and immediate) +union AddSub2LowRegImm16 { + uint16_t flat; + struct { + uint16_t Rd : 3; + uint16_t Rn : 3; + uint16_t imm3 : 3; + uint16_t S : 1; + uint16_t _000111 : 6; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSub2LowRegImm16) == 2, " "); + +// Add, subtract, compare, move (one low register and immediate) +union AddSubComp1LowRegImm16 { + uint16_t flat; + struct { + uint16_t imm8 : 8; + uint16_t Rd : 3; + uint16_t op : 2; + uint16_t _001 : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddSubComp1LowRegImm16) == 2, " "); + +// Adjust SP (immediate) +union AdjustSPImm16 { + uint16_t flat; + struct { + uint16_t imm7 : 7; + uint16_t S : 1; + uint16_t _10110000 : 8; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AdjustSPImm16) == 2, " "); + +// MOV, MOVS (register) — T2 +union MOVrT2_16 { + uint16_t flat; + struct { + uint16_t Rd : 3; + uint16_t Rm : 3; + uint16_t imm5 : 5; + uint16_t op : 2; + uint16_t _000 : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(MOVrT2_16) == 2, " "); + +// Load/store word/byte (immediate offset) +union LoadStoreWordByteImm16 { + uint16_t flat; + struct { + uint16_t Rt : 3; + uint16_t Rn : 3; + uint16_t imm5 : 5; + uint16_t L : 1; + uint16_t B : 1; + uint16_t _011 : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreWordByteImm16) == 2, " "); + +// Load/store (SP-relative) +union LoadStoreSPRelative16 { + uint16_t flat; + struct { + uint16_t imm8 : 8; + uint16_t Rt : 3; + uint16_t L : 1; + uint16_t _1001 : 4; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreSPRelative16) == 2, " "); + +// Add PC/SP (immediate) +union AddPCSPImm16 { + uint16_t flat; + struct { + uint16_t imm8 : 8; + uint16_t Rd : 3; + uint16_t SP : 1; + uint16_t _1010 : 4; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(AddPCSPImm16) == 2, " "); + +// Miscellaneous 16-bit instructions +union Misc16 { + uint16_t flat; + struct { + uint16_t op3 : 4; + uint16_t _b4 : 1; + uint16_t op2 : 1; + uint16_t op1 : 2; + uint16_t op0 : 4; + uint16_t _1011 : 4; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(Misc16) == 2, " "); + +// B — T1 +union B_T1_16 { + uint16_t flat; + struct { + uint16_t imm8 : 8; + uint16_t cond : 4; + uint16_t _1101 : 4; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(B_T1_16) == 2, " "); + +// B — T2 +union B_T2_16 { + uint16_t flat; + struct { + uint16_t imm11 : 11; + uint16_t _11100 : 5; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(B_T2_16) == 2, " "); + +// Shift (immediate), add, subtract, move, and compare +union ShiftImmAddSubMoveComp16 { + uint16_t flat; + struct { + uint16_t _9_to_0 : 10; + uint16_t op2 : 1; + uint16_t op1 : 2; + uint16_t op0 : 1; + uint16_t _00 : 2; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(ShiftImmAddSubMoveComp16) == 2, " "); + +// Load/Store Multiple +union LoadStoreMult32 { + uint32_t flat; + struct { + uint32_t register_list : 13; + uint32_t _0_b13 : 1; + uint32_t M : 1; + uint32_t P : 1; + uint32_t Rn : 4; + uint32_t L : 1; + uint32_t W : 1; + uint32_t _0_b22 : 1; + uint32_t opc : 2; + uint32_t _1110100 : 7; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(LoadStoreMult32) == 4, " "); + +// BL, BLX (immediate) — T1 +union BLT1_32 { + uint32_t flat; + struct { + uint32_t imm11 : 11; + uint32_t J2 : 1; + uint32_t _1 : 1; + uint32_t J1 : 1; + uint32_t _11 : 2; + uint32_t imm10 : 10; + uint32_t S : 1; + uint32_t _11110 : 5; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BLT1_32) == 4, " "); + +// BL, BLX (immediate) — T2 +union BLXT2_32 { + uint32_t flat; + struct { + uint32_t H : 1; + uint32_t imm10L : 10; + uint32_t J2 : 1; + uint32_t _0 : 1; + uint32_t J1 : 1; + uint32_t _11 : 2; + uint32_t imm10H : 10; + uint32_t S : 1; + uint32_t _11110 : 5; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BLXT2_32) == 4, " "); + +// Branches and miscellaneous control +union BranchesMiscControl32 { + uint32_t flat; + struct { + uint32_t _11110 : 5; + uint32_t op0 : 1; + uint32_t op1 : 4; + uint32_t op2 : 2; + uint32_t _19_to_16 : 4; + uint32_t _1 : 1; + uint32_t op3 : 3; + uint32_t _b11 : 1; + uint32_t op4 : 3; + uint32_t _7_to_6 : 2; + uint32_t op5 : 1; + uint32_t _4_to_0 : 5; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(BranchesMiscControl32) == 4, " "); + +// 32-bit instructions +union Top32bit { + uint32_t flat; + struct { + uint32_t _14_to_0 : 15; + uint32_t op3 : 1; + uint32_t _19_to_16 : 4; + uint32_t op1 : 5; + uint32_t op0 : 4; + uint32_t _111 : 3; + } __attribute__((packed)); +} __attribute__((packed)); +static_assert(sizeof(Top32bit) == 4, " "); + +// ------------- 16 Bit Instructions ------------- + +static const char *const kIdpNamesAddSubLowReg[] = { + [0b0] = "ADDL_T2", [0b1] = "SUBL_T2" +}; + +// S +// 0 ADD, ADDS (register) +// 1 SUB, SUBS (register) +// Add, subtract (three low registers) +static bool TryDecode16AddSub3LowReg(Instruction &inst, uint16_t bits) { + + // TODO(sonya) ADDS, SUBS - Decide how to handle InITBlock() + + const AddSub3LowReg16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + AddIntRegOp(inst, uint32_t(enc.Rn), 32u, Operand::kActionRead); + AddIntRegOp(inst, uint32_t(enc.Rm), 32u, Operand::kActionRead); + + return true; + +} + +// S +// 0 ADD, ADDS (immediate) +// 1 SUB, SUBS (immediate) +// Add, subtract (two low registers and immediate) +static bool TryDecode16AddSub2LowRegImm(Instruction &inst, uint16_t bits) { + + // TODO(sonya) ADDS, SUBS - Decide how to handle InITBlock() + + const AddSub2LowRegImm16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + AddIntRegOp(inst, uint32_t(enc.Rn), 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm3)); + + return true; + +} + +static const char *const kIdpAddSubComp1LowRegImm[] = { + [0b00] = "MOVL_T2", [0b01] = "CMPL_T2", + [0b10] = "ADDL_T2", [0b11] = "SUBL_T2" +}; + + +// op +// 00 MOV, MOVS (immediate) +// 01 CMP (immediate) +// 10 ADD, ADDS (immediate) +// 11 SUB, SUBS (immediate) +// Add, subtract, compare, move (one low register and immediate) +static bool TryDecode16AddSubComp1LowRegImm(Instruction &inst, uint16_t bits) { + + // TODO(sonya): setflags = !InITBlock() + + const AddSubComp1LowRegImm16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpAddSubComp1LowRegImm[enc.op]; + + // Unconditionally executed + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + if (enc.op) { + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionRead); + } + AddImmOp(inst, uint32_t(enc.imm8)); + + return true; + +} + +// MOV, MOVS (register) — T2 +static bool TryDecode16MOVrT2(Instruction &inst, uint16_t bits) { + + const MOVrT2_16 enc = {bits}; + inst.category = Instruction::kCategoryNormal; + inst.function = "MOVL_T2"; + + // TODO(sonya): setflags = !InITBlock() + // if op == '00' && imm5 == '00000' && InITBlock() then UNPREDICTABLE; + + AddIntRegOp(inst, uint32_t(enc.Rd), 32u, Operand::kActionWrite); + + // (shift_t, shift_n) = DecodeImmShift(op, imm5); + AddShiftRegImmOperand(inst, uint32_t(enc.Rm), uint32_t(enc.op), + uint32_t(enc.imm5), false, false); + + return true; + +} + +static const char *const kIdpLoadStoreWordByte[] = { + [0b00] = "STR_T2", [0b01] = "LDR_T2", + [0b10] = "STRB_T2", [0b11] = "LDRB_T2" +}; + +// B L +// 0 0 STR (immediate) +// 0 1 LDR (immediate) +// 1 0 STRB (immediate) +// 1 1 LDRB (immediate) +// Load/store word/byte (immediate offset) +template +static bool TryDecode16LoadStoreWordByteImm(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryNormal; + const LoadStoreWordByteImm16 enc = {bits}; + inst.function = kIdpLoadStoreWordByte[(enc.B << 1) | enc.L]; + + AddAddrRegOp(inst, kIntRegName[enc.Rt], 32u, kRegAction, 0u); + AddAddrRegOp(inst, kIntRegName[enc.Rn], 32u, kMemAction, enc.imm5 << 2); + + return true; +} + +static TryDecode16 *kDecode16LoadStoreWordByteImm[] = { + [0b00] = TryDecode16LoadStoreWordByteImm, + [0b01] = TryDecode16LoadStoreWordByteImm, + [0b10] = TryDecode16LoadStoreWordByteImm, + [0b11] = TryDecode16LoadStoreWordByteImm +}; + +// L +// 0 STR (immediate) +// 1 LDR (immediate) +// Load/store (SP-relative) TODO(sonya) +static bool TryDecode16LoadStoreSPRelative(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + + return false; +// const LoadStoreSPRelative16 enc = {bits}; +} + +// SP +// 0 ADR +// 1 ADD, ADDS (SP plus immediate) +// Add PC/SP (immediate) +static bool TryDecode16AddPCSP(Instruction &inst, uint16_t bits) { + + const AddPCSPImm16 enc = {bits}; + inst.function = enc.SP ? "ADDL_T2" : "ADR"; + + // TODO(sonya): ADR + + if (enc.SP) { + inst.category = Instruction::kCategoryNormal; + + AddIntRegOp(inst, enc.Rd, 32u, Operand::kActionWrite); + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm8 << 2)); + + return true; + } + + inst.category = Instruction::kCategoryError; + return false; +} + +// CBNZ, CBZ TODO(sonya) +static bool TryDecode16CBZ(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; +} + + +// Adjust SP (immediate) +static bool TryDecode16AdjustSPImm(Instruction &inst, uint16_t bits) { + + const AdjustSPImm16 enc = {bits}; + + // TODO(sonya): setflags = !InITBlock() + + inst.category = Instruction::kCategoryNormal; + inst.function = kIdpNamesAddSubLowReg[enc.S]; + + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionWrite); + AddIntRegOp(inst, kSPRegNum, 32u, Operand::kActionRead); + AddImmOp(inst, uint32_t(enc.imm7 << 2)); + + return true; +} + +// B — T1 encoding TODO(sonya) +static bool TryDecode16B_T1(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; +// const B_T1_16 enc = {bits}; +} + +// B — T2 encoding TODO(sonya) +static bool TryDecode16B_T2(Instruction &inst, uint16_t bits) { + inst.category = Instruction::kCategoryError; + return false; +// const B_T2_16 enc = {bits}; +} + +// ------------- 32 Bit Instructions ------------- + +// opc L +// 00 0 SRS, SRSDA, SRSDB, SRSIA, SRSIB — T1 +// 00 1 RFE, RFEDA, RFEDB, RFEIA, RFEIB — T1 +// 01 0 STM, STMIA, STMEA +// 01 1 LDM, LDMIA, LDMFD +// 10 0 STMDB, STMFD +// 10 1 LDMDB, LDMEA +// 11 0 SRS, SRSDA, SRSDB, SRSIA, SRSIB — T2 +// 11 1 RFE, RFEDA, RFEDB, RFEIA, RFEIB — T2 +// Load/Store Multiple TODO(sonya) +// NOTE(sonya): this should become a template probably +// (see TryDecodeLoadStoreMultiple in aarch32. the semantics are identical) +static bool TryDecode32LoadStoreMult(Instruction &inst, uint32_t bits) { + inst.category = Instruction::kCategoryError; + return false; +// const LoadStoreMult32 enc = {bits}; +} + +// BL, BLX (immediate) — T1 TODO(sonya) +static bool TryDecode32BL(Instruction &inst, uint32_t bits) { + inst.category = Instruction::kCategoryError; + return false; +// const BLT1_32 enc = {bits}; +} + +// BL, BLX (immediate) — T2 TODO(sonya) +static bool TryDecode32BLX(Instruction &inst, uint32_t bits) { + inst.category = Instruction::kCategoryError; + return false; +// const BLXT2_32 enc = {bits}; +} + +// ----------------------------------------------- + +// op0 op1 op2 op3 +// 0000 Adjust SP (immediate) +// 0010 Extend +// 0110 00 0 SETPAN (ARMv8.1) +// 0110 00 1 UNALLOCATED +// 0110 01 Change Processor State +// 0110 1x UNALLOCATED +// 0111 UNALLOCATED +// 1000 UNALLOCATED +// 1010 10 HLT +// 1010 != 10 Reverse bytes +// 1110 BKPT +// 1111 0000 Hints +// 1111 != 0000 IT +// x0x1 CBNZ, CBZ +// x10x Push and Pop +static TryDecode16 *TryDecodeMisc16(uint16_t bits) { + const Misc16 enc = {bits}; + + if (!enc.op0) { + return TryDecode16AdjustSPImm; + + // op0 == x0x1 CBNZ, CBZ + } else if ((enc.op0 & 0b0001) && !((enc.op0 << 1) >> 3)) { + return TryDecode16CBZ; + } + + return nullptr; +} + +// op0 +// 00xxxx Shift (immediate), add, subtract, move, and compare +// 010000 Data-processing (two low registers) +// 010001 Special data instructions and branch and exchange +// 01001x LDR (literal) — T1 +// 0101xx Load/store (register offset) +// 011xxx Load/store word/byte (immediate offset) +// 1000xx Load/store halfword (immediate offset) +// 1001xx Load/store (SP-relative) +// 1010xx Add PC/SP (immediate) +// 1011xx Miscellaneous 16-bit instructions +// 1100xx Load/store multiple +// 1101xx Conditional branch, and Supervisor Call +static TryDecode16 *Try16bit(uint16_t bits) { + uint16_t op0 = bits >> 10; + + // The following constraints also apply to this encoding: op0<5:3> != 111 + if ((op0 >> 3) == 0b111) { + return nullptr; + } + + // 00xxxx Shift (immediate), add, subtract, move, and compare + if (!(op0 >> 4)) { + + // op0 op1 op2 + // 0 11 0 Add, subtract (three low registers) + // 0 11 1 Add, subtract (two low registers and immediate) + // 0 != 11 MOV, MOVS (register) — T2 + // 1 Add, subtract, compare, move (one low register and + // immediate) + const ShiftImmAddSubMoveComp16 enc = {bits}; + + if (enc.op0) { + return TryDecode16AddSubComp1LowRegImm; + + } else if (enc.op1 != 0b11) { + return TryDecode16MOVrT2; + + } else if (enc.op2) { + return TryDecode16AddSub2LowRegImm; + + } else { + return TryDecode16AddSub3LowReg; + + } + // 010001 Special data instructions and branch and exchange + } else if (op0 == 0b010001) { + // TODO(sonya): Add, subtract, compare, move (two high registers) + // -- for ADD, ADDS (register) + return nullptr; + + // 011xxx Load/store word/byte (immediate offset) + } else if ((op0 >> 3) == 0b011) { + return kDecode16LoadStoreWordByteImm[(op0 >> 1) & 0b11]; + + // 1001xx Load/store (SP-relative) + } else if ((op0 >> 2) == 0b1001) { + return TryDecode16LoadStoreSPRelative; + + // 1010xx Add PC/SP (immediate) + } else if ((op0 >> 2) == 0b1010) { + return TryDecode16AddPCSP; + + // 1011xx Miscellaneous 16-bit instructions + } else if ((op0 >> 2) == 0b1011) { + return TryDecodeMisc16(bits); + + // 1101xx Conditional branch, and Supervisor Call + } else if ((op0 >> 2) == 0b1101) { + uint16_t _op0 = (bits << 4) >> 9; + + // op0 + // 111x Exception generation + // != 111x B — T1 + if (_op0 == 0b111) { + return nullptr; + } else { + return TryDecode16B_T1; + } + } + + return nullptr; +} + + + +// op0 op1 op2 op3 op4 op5 +// 0 1110 0x 0x0 0 MSR (register) +// 0 1110 0x 0x0 1 MSR (Banked register) +// 0 1110 10 0x0 000 Hints +// 0 1110 10 0x0 != 000 Change processor state +// 0 1110 11 0x0 Miscellaneous system +// 0 1111 00 0x0 BXJ +// 0 1111 01 0x0 Exception return +// 0 1111 1x 0x0 0 MRS +// 0 1111 1x 0x0 1 MRS (Banked register) +// 1 1110 00 000 DCPS +// 1 1110 00 010 UNALLOCATED +// 1 1110 01 0x0 UNALLOCATED +// 1 1110 1x 0x0 UNALLOCATED +// 1 1111 0x 0x0 UNALLOCATED +// 1 1111 1x 0x0 Exception generation +// != 111x 0x0 B — T3 +// 0x1 B — T4 +// 1x0 BL, BLX (immediate) — T2 +// 1x1 BL, BLX (immediate) — T1 +// Branches and miscellaneous control +static TryDecode *TryBranchesMiscControl32(uint32_t bits) { + const BranchesMiscControl32 enc = {bits}; + + if (enc.op3 >> 2) { // op3 == 1xx + if (enc.op3 & 0b001) { // op3 == 1x1 + return TryDecode32BL; + + } else { // // op3 == 1x0 + return TryDecode32BLX; + + } + } + + return nullptr; +} + + +// op0 op1 op3 +// x11x System register access, Advanced SIMD, and +// floating-point +// 0100 xx0xx Load/store multiple +// 0100 xx1xx Load/store dual, load/store exclusive, +// load-acquire/store-release, and table branch +// 0101 Data-processing (shifted register) +// 10xx 1 Branches and miscellaneous control +// 10x0 0 Data-processing (modified immediate) +// 10x1 0 Data-processing (plain binary immediate) +// 1100 1xxx0 Advanced SIMD element or structure load/store +// 1100 != 1xxx0 Load/store single +// 1101 0xxxx Data-processing (register) +// 1101 10xxx Multiply, multiply accumulate, and absolute difference +// 1101 11xxx Long multiply and divide +static TryDecode *Try32Bit(uint32_t bits) { + const Top32bit enc = {bits}; + + // op0 == 0100, op1 == xx0xx, Load/store multiple + if ((enc.op0 == 0b0100) && !(enc.op1 & 0b00100)) { + return TryDecode32LoadStoreMult; + + // op0 == 10xx, op3 == 1, Branches and miscellaneous control + } else if (((enc.op0 >> 2) == 0b10) && enc.op3){ + return TryBranchesMiscControl32(bits); + + } + + return nullptr; +} + +bool DecodeThumb2Instruction(Instruction &inst, uint32_t bits) { + bool ret; + + // op0 op1 + // != 111 16-bit + // 111 00 B — T2 + // 111 != 00 32-bit + // TODO(sonya): make adjustments to inst for a 16 bit increment + { + auto bits16 = uint16_t(bits >> 16); + + // 16-bit instructions + if (bits >> 13 != 0b111) { + inst.next_pc = inst.pc + 2ull; // Default fall-through. + //inst.bytes = inst_bytes; + + auto decoder = Try16bit(bits16); + if (!decoder) { + LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; + LOG(ERROR) << "unhandled bits16 " << std::hex << bits16 << std::dec; + return false; + } + ret = decoder(inst, bits16); + + // B — T2 + } else if (!((bits << 3) >> 11)) { + inst.next_pc = inst.pc + 2ull; // Default fall-through. + //inst.bytes = inst_bytes; + + auto decoder = TryDecode16B_T2; + ret = decoder(inst, bits16); + + // 32-bit instructions + } else { + auto decoder = Try32Bit(bits); + + if (!decoder) { + LOG(ERROR) << "unhandled bits " << std::hex << bits << std::dec; + return false; + } + + ret = decoder(inst, bits); + + } + } + + LOG(ERROR) << inst.Serialize(); + return ret; +} + +} // namespace aarch32 + +} // namespace remill + + diff --git a/lib/Arch/AArch32/Runtime/CMakeLists.txt b/lib/Arch/AArch32/Runtime/CMakeLists.txt index 560e8d054..ffcbf23f1 100644 --- a/lib/Arch/AArch32/Runtime/CMakeLists.txt +++ b/lib/Arch/AArch32/Runtime/CMakeLists.txt @@ -64,3 +64,4 @@ function(add_runtime_helper target_name little_endian) endfunction() add_runtime_helper(aarch32 1) +add_runtime_helper(thumb2 1) diff --git a/lib/Arch/AArch32/Semantics/BINARY.cpp b/lib/Arch/AArch32/Semantics/BINARY.cpp index fc5e98db8..f8006365a 100644 --- a/lib/Arch/AArch32/Semantics/BINARY.cpp +++ b/lib/Arch/AArch32/Semantics/BINARY.cpp @@ -94,6 +94,12 @@ DEF_COND_SEM(SUB, R32W dst, R32 src1, I32 src2, R32W maybe_next_pc_dst) { return memory; } +DEF_SEM(SUBL_T2, R32W dst, R32 src1, I32 src2) { + auto value = Read(src2); + Write(dst, USub(Read(src1), value)); + return memory; +} + DEF_COND_SEM(SUBS, R32W dst, R32 src1, I32 src2, I8 carry_out, R32W maybe_next_pc_dst) { auto rhs = Read(src2); @@ -111,6 +117,12 @@ DEF_COND_SEM(ADD, R32W dst, R32 src1, I32 src2, R32W maybe_next_pc_dst) { return memory; } +DEF_SEM(ADDL_T2, R32W dst, R32 src1, I32 src2) { + auto value = Read(src2); + Write(dst, UAdd(Read(src1), value)); + return memory; +} + DEF_COND_SEM(ADDS, R32W dst, R32 src1, I32 src2, I8 carry_out, R32W maybe_next_pc_dst) { auto rhs = Read(src2); @@ -178,12 +190,14 @@ DEF_ISEL(ANDSrr) = ANDS; DEF_ISEL(EORrr) = EOR; DEF_ISEL(EORSrr) = EORS; DEF_ISEL(ADDrr) = ADD; +DEF_ISEL(ADDL_T2) = ADDL_T2; DEF_ISEL(ADDSrr) = ADDS; DEF_ISEL(ADCrr) = ADC; DEF_ISEL(ADCSrr) = ADCS; DEF_ISEL(RSBrr) = RSB; DEF_ISEL(RSBSrr) = RSBS; DEF_ISEL(SUBrr) = SUB; +DEF_ISEL(SUBL_T2) = SUBL_T2; DEF_ISEL(SUBSrr) = SUBS; DEF_ISEL(SBCrr) = SBC; DEF_ISEL(SBCSrr) = SBCS; diff --git a/lib/Arch/AArch32/Semantics/COND.cpp b/lib/Arch/AArch32/Semantics/COND.cpp index 7e298c0af..00f258286 100644 --- a/lib/Arch/AArch32/Semantics/COND.cpp +++ b/lib/Arch/AArch32/Semantics/COND.cpp @@ -44,6 +44,13 @@ DEF_COND_SEM(CMP, R32 src1, I32 src2, I8 carry_out) { return memory; } +DEF_SEM(CMPL_T2, R32 src1, I32 src2, I8 carry_out) { + auto rhs = Read(src2); + auto lhs = Read(src1); + AddWithCarryNZCV(state, lhs, UNot(rhs), uint32_t(1)); + return memory; +} + DEF_COND_SEM(CMN, R32 src1, I32 src2, I8 carry_out) { auto rhs = Read(src2); auto lhs = Read(src1); @@ -56,3 +63,5 @@ DEF_ISEL(TSTr) = TST; DEF_ISEL(TEQr) = TEQ; DEF_ISEL(CMPr) = CMP; DEF_ISEL(CMNr) = CMN; + +DEF_ISEL(CMPL_T2) = CMPL_T2; diff --git a/lib/Arch/AArch32/Semantics/LOGICAL.cpp b/lib/Arch/AArch32/Semantics/LOGICAL.cpp index b6c5617be..09cd32c8c 100644 --- a/lib/Arch/AArch32/Semantics/LOGICAL.cpp +++ b/lib/Arch/AArch32/Semantics/LOGICAL.cpp @@ -82,6 +82,14 @@ DEF_COND_SEM(MOVT, R32W dst, R32 src1, R32 src2) { Write(dst, result); return memory; } + +DEF_SEM(MOVL_T2, R32W dst, I32 src1) { + auto value = Read(src1); + Write(dst, value); + return memory; +} + } // namespace DEF_ISEL(MOVT) = MOVT; +DEF_ISEL(MOVL_T2) = MOVL_T2; diff --git a/lib/Arch/AArch32/Semantics/MEM.cpp b/lib/Arch/AArch32/Semantics/MEM.cpp index bfa1a446c..004368892 100644 --- a/lib/Arch/AArch32/Semantics/MEM.cpp +++ b/lib/Arch/AArch32/Semantics/MEM.cpp @@ -507,3 +507,31 @@ DEF_ISEL(STMIB) = STMDB; DEF_ISEL(LDMIB) = LDM; // DEF_ISEL(LDMe) = LDMe; + +// Thumb2 +namespace { + + +template +DEF_SEM(STR_T2, M dst, R32 src1) { + auto src = TruncTo(Read(src1)); + WriteZExt(dst, src); + + return memory; +} + +template +DEF_SEM(LDR_T2, R32W dst, M src1) { + auto src = Read(src1); + WriteZExt(dst, src); + + return memory; +} +} // namespace + +DEF_ISEL(STR_T2) = STR_T2; +DEF_ISEL(LDR_T2) = LDR_T2; +DEF_ISEL(STRB_T2) = STR_T2; +DEF_ISEL(LDRB_T2) = LDR_T2; + + diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index 7cd353036..e61b5d6e5 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -56,6 +56,7 @@ static unsigned AddressSize(ArchName arch_name) { case kArchInvalid: LOG(FATAL) << "Cannot get address size for invalid arch."; return 0; + case kArchThumb2LittleEndian: case kArchX86: case kArchX86_AVX: case kArchX86_AVX512: @@ -146,6 +147,12 @@ auto Arch::Build(llvm::LLVMContext *context_, OSName os_name_, break; } + case kArchThumb2LittleEndian: { + DLOG(INFO) << "Using architecture: Thumb2, feature set: Little Endian"; + ret = GetAArch32(context_, os_name_, arch_name_); + break; + } + case kArchAArch32LittleEndian: { DLOG(INFO) << "Using architecture: AArch32, feature set: Little Endian"; ret = GetAArch32(context_, os_name_, arch_name_); @@ -348,7 +355,11 @@ bool Arch::IsAMD64(void) const { } bool Arch::IsAArch32(void) const { - return remill::kArchAArch32LittleEndian == arch_name; + switch (arch_name) { + case remill::kArchAArch32LittleEndian: + case remill::kArchThumb2LittleEndian: return true; + default: return false; + } } bool Arch::IsAArch64(void) const { diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index 12029fdbb..144c66592 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -656,6 +656,7 @@ std::string Instruction::Serialize(void) const { case kArchX86: case kArchX86_AVX: case kArchX86_AVX512: ss << "X86"; break; + case kArchThumb2LittleEndian: ss << "Thumb2"; break; case kArchAArch32LittleEndian: ss << "AArch32"; break; case kArchAArch64LittleEndian: ss << "AArch64"; break; case kArchSparc32: ss << "SPARC32"; break; diff --git a/lib/Arch/Name.cpp b/lib/Arch/Name.cpp index ecf3a09b8..14a471ae4 100644 --- a/lib/Arch/Name.cpp +++ b/lib/Arch/Name.cpp @@ -26,7 +26,7 @@ ArchName GetArchName(const llvm::Triple &triple) { case llvm::Triple::ArchType::x86_64: return kArchAMD64; case llvm::Triple::ArchType::aarch64: return kArchAArch64LittleEndian; case llvm::Triple::ArchType::arm: return kArchAArch32LittleEndian; - case llvm::Triple::ArchType::thumb: return kArchAArch32LittleEndian; + case llvm::Triple::ArchType::thumb: return kArchThumb2LittleEndian; case llvm::Triple::sparc: return kArchSparc32; case llvm::Triple::sparcv9: return kArchSparc64; default: return kArchInvalid; @@ -52,6 +52,9 @@ ArchName GetArchName(std::string_view arch_name) { } else if (arch_name == "amd64_avx512") { return kArchAMD64_AVX512; + } else if (arch_name == "thumb2") { + return kArchThumb2LittleEndian; + } else if (arch_name == "aarch32") { return kArchAArch32LittleEndian; @@ -79,6 +82,7 @@ static const std::string_view kArchNames[] = { [kArchAMD64] = "amd64", [kArchAMD64_AVX] = "amd64_avx", [kArchAMD64_AVX512] = "amd64_avx512", + [kArchThumb2LittleEndian] = "thumb2", [kArchAArch32LittleEndian] = "aarch32", [kArchAArch64LittleEndian] = "aarch64", [kArchSparc32] = "sparc32",