Print :abs_gN: prefixes for ARM64 MOVZ/MOVK#20
Print :abs_gN: prefixes for ARM64 MOVZ/MOVK#20boppitybop wants to merge 3 commits intoGrammaTech:masterfrom
Conversation
When a symbolic operand on a MOVZ/MOVK instruction carries G0-G3 attributes, print the appropriate :abs_gN: or :abs_gN_nc: prefix so the assembler emits the correct relocation. - G3 on MOVZ -> #:abs_g3:sym - G2 on MOVK -> #:abs_g2_nc:sym - G1 on MOVK -> #:abs_g1_nc:sym - G0 on MOVZ -> #:abs_g0:sym / G0 on MOVK -> #:abs_g0_nc:sym Companion PR: GrammaTech/ddisasm#90
885d249 to
ecd2c09
Compare
junghee
left a comment
There was a problem hiding this comment.
@boppitybop Thank you for the contribution!
I've added some inline review comments -- please take a look and address them when you get a chance.
Also, could you add some test(s) in tests/arm64_syntax_test.py and include an entry in CHANGELOG.md?
Thanks again!
| // Capstone aliases MOVZ as "mov", but the GNU assembler requires "movz" | ||
| // when using :abs_gN: relocation modifiers. Detect the MOVZ encoding from | ||
| // raw instruction bytes and use the canonical mnemonic, but only when the | ||
| // instruction has a symbolic operand with abs_g attributes. | ||
| if (opcode.empty() && inst.size == 4) { | ||
| uint32_t Enc = static_cast<uint32_t>(inst.bytes[0]) | ||
| | (static_cast<uint32_t>(inst.bytes[1]) << 8) | ||
| | (static_cast<uint32_t>(inst.bytes[2]) << 16) | ||
| | (static_cast<uint32_t>(inst.bytes[3]) << 24); | ||
| bool IsMovzEncoding = ((Enc >> 23) & 0x3F) == 0x25 | ||
| && ((Enc >> 29) & 0x3) == 0x2; | ||
| if (IsMovzEncoding) { | ||
| const gtirb::SymbolicExpression* Symex = | ||
| block.getByteInterval()->getSymbolicExpression( | ||
| ea - *block.getByteInterval()->getAddress()); | ||
| if (Symex != nullptr) { | ||
| const gtirb::SymAddrConst* Symaddr = | ||
| this->getSymbolicImmediate(Symex); | ||
| if (Symaddr != nullptr | ||
| && (Symaddr->Attributes.count(gtirb::SymAttribute::G0) | ||
| || Symaddr->Attributes.count(gtirb::SymAttribute::G1) | ||
| || Symaddr->Attributes.count(gtirb::SymAttribute::G2) | ||
| || Symaddr->Attributes.count(gtirb::SymAttribute::G3))) { | ||
| opcode = "movz"; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
There was a problem hiding this comment.
| // Capstone aliases MOVZ as "mov", but the GNU assembler requires "movz" | |
| // when using :abs_gN: relocation modifiers. Detect the MOVZ encoding from | |
| // raw instruction bytes and use the canonical mnemonic, but only when the | |
| // instruction has a symbolic operand with abs_g attributes. | |
| if (opcode.empty() && inst.size == 4) { | |
| uint32_t Enc = static_cast<uint32_t>(inst.bytes[0]) | |
| | (static_cast<uint32_t>(inst.bytes[1]) << 8) | |
| | (static_cast<uint32_t>(inst.bytes[2]) << 16) | |
| | (static_cast<uint32_t>(inst.bytes[3]) << 24); | |
| bool IsMovzEncoding = ((Enc >> 23) & 0x3F) == 0x25 | |
| && ((Enc >> 29) & 0x3) == 0x2; | |
| if (IsMovzEncoding) { | |
| const gtirb::SymbolicExpression* Symex = | |
| block.getByteInterval()->getSymbolicExpression( | |
| ea - *block.getByteInterval()->getAddress()); | |
| if (Symex != nullptr) { | |
| const gtirb::SymAddrConst* Symaddr = | |
| this->getSymbolicImmediate(Symex); | |
| if (Symaddr != nullptr | |
| && (Symaddr->Attributes.count(gtirb::SymAttribute::G0) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G1) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G2) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G3))) { | |
| opcode = "movz"; | |
| } | |
| } | |
| } | |
| } | |
| void Arm64PrettyPrinter::fixupInstruction(const gtirb::CodeBlock& block, cs_insn& inst) { | |
| ElfPrettyPrinter::fixupInstruction(block, inst); | |
| // Capstone aliases MOVZ as "mov", but the GNU assembler requires "movz" | |
| // when using :abs_gN: relocation modifiers. Detect the MOVZ encoding from | |
| // raw instruction bytes and use the canonical mnemonic, but only when the | |
| // instruction has a symbolic operand with abs_g attributes. | |
| if (inst.size == 4) { | |
| uint32_t Enc = static_cast<uint32_t>(inst.bytes[0]) | |
| | (static_cast<uint32_t>(inst.bytes[1]) << 8) | |
| | (static_cast<uint32_t>(inst.bytes[2]) << 16) | |
| | (static_cast<uint32_t>(inst.bytes[3]) << 24); | |
| bool IsMovzEncoding = ((Enc >> 23) & 0x3F) == 0x25 | |
| && ((Enc >> 29) & 0x3) == 0x2; | |
| if (IsMovzEncoding) { | |
| gtirb::Addr ea(inst.address); | |
| const gtirb::SymbolicExpression* Symex = | |
| block.getByteInterval()->getSymbolicExpression( | |
| ea - *block.getByteInterval()->getAddress()); | |
| if (Symex != nullptr) { | |
| const gtirb::SymAddrConst* Symaddr = | |
| this->getSymbolicImmediate(Symex); | |
| if (Symaddr != nullptr | |
| && (Symaddr->Attributes.count(gtirb::SymAttribute::G0) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G1) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G2) | |
| || Symaddr->Attributes.count(gtirb::SymAttribute::G3))) { | |
| memcpy(inst.mnemonic, "MOVZ", 5); | |
| } | |
| } | |
| } | |
| } | |
| } |
It would be better to utilize fixupInstruction in the base class.
Currently, it only takes cs_insn& inst, but we could extend it to also take the block:
- virtual void fixupInstruction(cs_insn& inst);
+ virtual void fixupInstruction(const gtirb::CodeBlock& block, cs_insn& inst);
For consistency, the overriding functions could then be updated accordingly:
-void AttPrettyPrinter::fixupInstruction(cs_insn& Insn) {
+void AttPrettyPrinter::fixupInstruction(const gtirb::CodeBlock& block,
+ cs_insn& Insn) {
+ PrettyPrinterBase::fixupInstruction(block, Insn);
+
|
@junghee The latest commit should address all the comments, let me know if there is anything else when you get a chance to review again please, thanks! |
Hi @boppitybop the changes are currently under internal review and should be merged soon. Thank you again for your contribution! |
Companion to GrammaTech/ddisasm#90.