From 56a9ff0fc8c77fc9b3f67d02f15385fe41ec18de Mon Sep 17 00:00:00 2001 From: Alluseri <42897348+Alluseri@users.noreply.github.com> Date: Mon, 2 Sep 2024 09:43:13 +0500 Subject: [PATCH] Core: Implement the x64 calling convention resolver (msvc/gcc) --- .../InstructionSets/X86InstructionSet.cs | 144 +++++---- .../Utils/X64CallingConventionResolver.cs | 306 ++++++++++++++++++ 2 files changed, 386 insertions(+), 64 deletions(-) create mode 100644 Cpp2IL.Core/Utils/X64CallingConventionResolver.cs diff --git a/Cpp2IL.Core/InstructionSets/X86InstructionSet.cs b/Cpp2IL.Core/InstructionSets/X86InstructionSet.cs index d3b97b17..bdba6d6f 100644 --- a/Cpp2IL.Core/InstructionSets/X86InstructionSet.cs +++ b/Cpp2IL.Core/InstructionSets/X86InstructionSet.cs @@ -5,12 +5,15 @@ using Cpp2IL.Core.Extensions; using Cpp2IL.Core.Il2CppApiFunctions; using Cpp2IL.Core.ISIL; +using Cpp2IL.Core.Logging; using Cpp2IL.Core.Model.Contexts; using Cpp2IL.Core.Utils; using Iced.Intel; +using LibCpp2IL.BinaryStructures; namespace Cpp2IL.Core.InstructionSets; +// This is honestly an X64InstructionSet by all means. Everything here screams "I AM X64". public class X86InstructionSet : Cpp2IlInstructionSet { private static readonly MasmFormatter Formatter = new(); @@ -61,20 +64,20 @@ public override List GetIsilFromMethod(Met private void ConvertInstructionStatement(Instruction instruction, IsilBuilder builder, MethodAnalysisContext context) { - // var callNoReturn = false; // stub, see case Mnemonic.Call + var callNoReturn = false; switch (instruction.Mnemonic) { case Mnemonic.Mov: - case Mnemonic.Movzx: //For all intents and purposes we don't care about zero-extending - case Mnemonic.Movaps: //Movaps is basically just a mov but with the potential future detail that the size is dependent on reg size - case Mnemonic.Movups: //Movaps but unaligned - case Mnemonic.Movss: //Same as movaps but for floats - case Mnemonic.Movd: //Mov but specifically dword - case Mnemonic.Movq: //Mov but specifically qword - case Mnemonic.Movsd: //Mov but specifically double - case Mnemonic.Movdqa: //Movaps but multiple integers at once in theory - case Mnemonic.Cvtdq2ps: //Technically a convert double to single, but for analysis purposes we can just treat it as a move + case Mnemonic.Movzx: // For all intents and purposes we don't care about zero-extending + case Mnemonic.Movaps: // Movaps is basically just a mov but with the potential future detail that the size is dependent on reg size + case Mnemonic.Movups: // Movaps but unaligned + case Mnemonic.Movss: // Same as movaps but for floats + case Mnemonic.Movd: // Mov but specifically dword + case Mnemonic.Movq: // Mov but specifically qword + case Mnemonic.Movsd: // Mov but specifically double + case Mnemonic.Movdqa: // Movaps but multiple integers at once in theory + case Mnemonic.Cvtdq2ps: // Technically a convert double to single, but for analysis purposes we can just treat it as a move builder.Move(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 1)); break; case Mnemonic.Lea: @@ -107,8 +110,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu case Mnemonic.Imul: if (instruction.OpCount == 1) { - int OpSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize(); - switch (OpSize) // TODO I don't know how to work with dual registers here in Iced, I left hints though + int opSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize(); + switch (opSize) // TODO: I don't know how to work with dual registers here, I left hints though { case 1: // Op0 * AL -> AX builder.Multiply(instruction.IP, Register.AX.MakeIndependent(), ConvertOperand(instruction, 0), Register.AL.MakeIndependent()); @@ -145,10 +148,18 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu break; case Mnemonic.Ret: + // TODO: Verify correctness of operation with Vectors. + + // On x32, this will require better engineering since ulongs are handled somehow differently (return in 2 registers, I think?) + // The x64 prototype should work. + // Are st* registers even used in il2cpp games? + if (context.IsVoid) builder.Return(instruction.IP); + else if (context.Definition?.RawReturnType?.Type is Il2CppTypeEnum.IL2CPP_TYPE_R4 or Il2CppTypeEnum.IL2CPP_TYPE_R8) + builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("xmm0")); else - builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("rax")); //TODO Support xmm0 + builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("rax")); break; case Mnemonic.Push: //var operandSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize(); @@ -164,7 +175,7 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu case Mnemonic.Add: var isSubtract = instruction.Mnemonic == Mnemonic.Sub; - //Special case - stack shift + // Special case - stack shift if (instruction.Op0Register == Register.RSP && instruction.Op1Kind.IsImmediate()) { var amount = (int)instruction.GetImmediate(1); @@ -182,8 +193,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu break; case Mnemonic.Addss: case Mnemonic.Subss: - //Addss and subss are just floating point add/sub, but we don't need to handle the stack stuff - //But we do need to handle 2 vs 3 operand forms + // Addss and subss are just floating point add/sub, but we don't need to handle the stack stuff + // But we do need to handle 2 vs 3 operand forms InstructionSetIndependentOperand dest; InstructionSetIndependentOperand src1; InstructionSetIndependentOperand src2; @@ -210,64 +221,69 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu else builder.Add(instruction.IP, dest, src1, src2); break; + // The following pair of instructions does not update the Carry Flag (CF): case Mnemonic.Dec: + builder.Subtract(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), InstructionSetIndependentOperand.MakeImmediate(1)); + break; case Mnemonic.Inc: - // no CF - var isDec = instruction.Mnemonic == Mnemonic.Dec; - var im = InstructionSetIndependentOperand.MakeImmediate(1); - if (isDec) builder.Subtract(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), im); - else builder.Add(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), im); + builder.Add(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), InstructionSetIndependentOperand.MakeImmediate(1)); break; case Mnemonic.Call: // We don't try and resolve which method is being called, but we do need to know how many parameters it has // I would hope that all of these methods have the same number of arguments, else how can they be inlined? - // TODO: Handle CallNoReturn(I have no idea how due to instructionAddress constantly being a limitation) + var target = instruction.NearBranchTarget; - if (context.AppContext.MethodsByAddress.ContainsKey(target)) + + if (context.AppContext.MethodsByAddress.TryGetValue(target, out var possibleMethods)) { - var possibleMethods = context.AppContext.MethodsByAddress[target]; - var parameterCounts = possibleMethods.Select(p => + if (possibleMethods.Count == 1) { - var ret = p.Parameters.Count; - if (!p.IsStatic) - ret++; //This arg - - ret++; //For MethodInfo arg - return ret; - }); - - // if (parameterCounts.Max() != parameterCounts.Min()) - // throw new("Cannot handle call to address with multiple managed methods of different parameter counts"); - - var parameterCount = parameterCounts.Max(); - var registerParams = new[] { "rcx", "rdx", "r8", "r9" }.Select(InstructionSetIndependentOperand.MakeRegister).ToList(); - - if (parameterCount <= registerParams.Count) + builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForManaged(possibleMethods[0])); + } + else { - builder.Call(instruction.IP, target, registerParams.GetRange(0, parameterCount).ToArray()); - return; + MethodAnalysisContext ctx = null!; + var lpars = -1; + + // Very naive approach, folds with structs in parameters if GCC is used: + foreach (var method in possibleMethods) + { + var pars = method.ParameterCount; + if (method.IsStatic) pars++; + if (pars > lpars) + { + lpars = pars; + ctx = method; + } + } + + // On post-analysis, you can discard methods according to the registers used, see X64CallingConventionResolver. + // This is less effective on GCC because MSVC doesn't overlap registers. + + builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForManaged(ctx)); } + } + else + { + // This isn't a managed method, so for now we don't know its parameter count. + // This will need to be rewritten if we ever stumble upon an unmanaged method that accepts more than 4 parameters. + // These can be converted to dedicated ISIL instructions for specific API functions at a later stage. (by a post-processing step) - //Need to use stack - parameterCount -= registerParams.Count; //Subtract the 4 params we can fit in registers + builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForUnmanaged(context.AppContext, target)); + } - //Generate and append stack operands - var ptrSize = (int)context.AppContext.Binary.PointerSize; - registerParams = registerParams.Concat(Enumerable.Range(0, parameterCount).Select(p => p * ptrSize).Select(InstructionSetIndependentOperand.MakeStack)).ToList(); + if (callNoReturn) + { + // Our function decided to jump into a thunk or do a funny return. + // We will insert a return after the call. + // According to common sense, such callee must have the same return value as the caller, unless it's __noreturn. + // I hope someone else will catch up on this and figure out non-returning functions. - builder.Call(instruction.IP, target, registerParams.ToArray()); + // TODO: Determine whether a function is an actual thunk and it's *technically better* to duplicate code for it, or if it's a regular retcall. + // Basic implementation may use context.AppContext.MethodsByAddress, but this doesn't catch thunks only. + // For example, SWDT often calls gc::GarbageCollector::SetWriteBarrier through a long jmp chain. That's a whole function, not just a thunk. - //Discard the consumed stack space - builder.ShiftStack(instruction.IP, -parameterCount * 8); - } - else - { - //This isn't a managed method, so for now we don't know its parameter count. - //Add all four of the registers, I guess. If there are any functions that take more than 4 params, - //we'll have to do something else here. - //These can be converted to dedicated ISIL instructions for specific API functions at a later stage. (by a post-processing step) - var paramRegisters = new[] { "rcx", "rdx", "r8", "r9" }.Select(InstructionSetIndependentOperand.MakeRegister).ToArray(); - builder.Call(instruction.IP, target, paramRegisters); + goto case Mnemonic.Ret; } break; @@ -294,8 +310,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu if (jumpTarget < methodStart || jumpTarget > methodEnd) { - // callNoReturn = true; - goto case Mnemonic.Call; // This is like 99% likely a non returning call, jump to case to avoid code duplication + callNoReturn = true; + goto case Mnemonic.Call; } else { @@ -375,11 +391,11 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu break; case Mnemonic.Int: case Mnemonic.Int3: - builder.Interrupt(instruction.IP); // We'll add it but eliminate later + builder.Interrupt(instruction.IP); // We'll add it but eliminate later, can be used as a hint since compilers only emit it in normally unreachable code or in error handlers break; case Mnemonic.Nop: - //While this is literally a nop and there's in theory no point emitting anything for it, it could be used as a jump target. - //So we'll emit an ISIL nop for it. + // While this is literally a nop and there's in theory no point emitting anything for it, it could be used as a jump target. + // So we'll emit an ISIL nop for it. builder.Nop(instruction.IP); break; default: diff --git a/Cpp2IL.Core/Utils/X64CallingConventionResolver.cs b/Cpp2IL.Core/Utils/X64CallingConventionResolver.cs new file mode 100644 index 00000000..3568f2f7 --- /dev/null +++ b/Cpp2IL.Core/Utils/X64CallingConventionResolver.cs @@ -0,0 +1,306 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Cpp2IL.Core.ISIL; +using Cpp2IL.Core.Model.Contexts; +using LibCpp2IL.BinaryStructures; +using LibCpp2IL.PE; + +namespace Cpp2IL.Core.Utils; + +#pragma warning disable IDE0305, IDE0300 + +public static class X64CallingConventionResolver +{ + // TODO: GCC(Linux) ABI + + // This class must be used in good faith. + // If that's not possible, uncomment the binary type checks. + // This *will* break everything on x32. + + const int ptrSize = 8; + + private static bool IsXMM(ParameterAnalysisContext par) => par.ParameterType.Type is Il2CppTypeEnum.IL2CPP_TYPE_R4 or Il2CppTypeEnum.IL2CPP_TYPE_R8; + + public static InstructionSetIndependentOperand[] ResolveForUnmanaged(ApplicationAnalysisContext app, ulong target) + { + // This is mostly a stub and may be extended in the future. You can traverse exports here for example. + // For now, we'll return all normal registers and omit the floating point registers. + + return app.Binary is PE ? new[] { + ToOperand(MicrosoftNormalRegister.rcx), + ToOperand(MicrosoftNormalRegister.rdx), + ToOperand(MicrosoftNormalRegister.r8), + ToOperand(MicrosoftNormalRegister.r9) + } : new[] { + ToOperand(LinuxNormalRegister.rdi), + ToOperand(LinuxNormalRegister.rsi), + ToOperand(LinuxNormalRegister.rdx), + ToOperand(LinuxNormalRegister.rcx), + ToOperand(LinuxNormalRegister.r8), + ToOperand(LinuxNormalRegister.r9) + }; + } + + public static InstructionSetIndependentOperand[] ResolveForManaged(MethodAnalysisContext ctx) + { + // if (ctx.AppContext.Binary.is32Bit) + // throw new NotSupportedException("Resolution of 64-bit calling conventions in 32-bit binaries is not supported."); + + List args = new(); + + var addThis = !ctx.IsStatic; + var isReturningAnOversizedStructure = false; // TODO: Determine whether we return a structure and whether that structure is oversized. + + /* + GCC: + Small structures - packed into N registers: + { + int x; + int y; + } + will be packed as a single normal register + + Big structures - packed into N registers: + { + int x; + int y; + int z; + int w; + } + will be packed as two normal registers + + Large structures - always in the stack: + { + int x; + int y; + int z; + int w; + int kk; + int mm; + } + will be packed into the stack + + Small XMM structures - packed into NX registers: + { + int x; + double y; + } + x will be packed into normal register, y will be packed into fp register, they do overlap (xmm0 and rdi are used) + + Fit XMM structures - packed into X registers: + { + double x; + double y; + } + will be packed as 2 xmm registers + + Big XMM structures - always in the stack: + { + double x; + double y; + int z; + } + will be packed into the stack, even though technically you could pack it into 1 normal and 2 xmm registers (same goes for if the int is a double) + + Small float structures - packed into N registers: + { + float x; + int y; + } + x and y will be packed into a single normal register + + Fit float structures - packed into XN registers: + { + float x; + float y; + int z; + } + x and y will be packed into a single fp register(doesn't match int behavior!!!), z will be packed into a normal register + + Complete float structures - packed into X registers: + { + float x; + float y; + float z; + float w; + } + x,y and z,w will be packed into 2 fp registers + + Everything else is always in the stack. + Multiple structures in args also follow this rule. + 16-byte structures will be put into the stack after the 8th structure. The others stay in registers according to spec. + 32-byte structures will be put into the stack after the 4th structure. The others stay in registers according to spec. + Structure sizes above are determined by their register size(16-byte fits into one R*X, 32-byte fits into two R*X, no matter the actual size). + The structures don't get cross-packed in the registers, which means they can't overlap, even if possible on a bit level. + Check .IsRef and pray it's true (don't need to handle struct fields individually, it's a pointer) + */ + + /* + MSVC doesn't need any special code to be implemented. + */ + + if (ctx.AppContext.Binary is PE) + { + /* + MSVC cconv: + RCX = XMM0 + RDX = XMM1 + R8 = XMM2 + R9 = XMM3 + [stack, every field is 8 incl. f & d, uses mov] + */ + + var i = 0; + + if (isReturningAnOversizedStructure) + { + args.Add(ToOperand(MicrosoftNormalRegister.rcx + i)); + i++; + } + + if (addThis) + { + args.Add(ToOperand(MicrosoftNormalRegister.rcx + i)); + i++; + } + + void AddParameter(ParameterAnalysisContext? par) + { + if (i < 4) + { + args.Add((par != null && IsXMM(par)) ? ToOperand(LinuxFloatingRegister.xmm0 + i) : ToOperand(MicrosoftNormalRegister.rcx + i)); + } + else + { + args.Add(InstructionSetIndependentOperand.MakeStack((i - 4) * ptrSize)); + } + } + + for (; i < ctx.ParameterCount; i++) + { + AddParameter(ctx.Parameters[i]); + } + + AddParameter(null); // The MethodInfo argument + } + else // if (ctx.AppContext.Binary is ElfFile) + { + /* + GCC cconv (-O2): + Integers & Longs: + rdi + rsi + rdx + rcx + r8 + r9 + [stack, uses push] + Doubles: + xmm0 + xmm1 + xmm2 + xmm3 + xmm4 + xmm5 + xmm6 + xmm7 + [stack, uses push] + */ + + LinuxNormalRegister nreg = 0; + LinuxFloatingRegister freg = 0; + var stack = 0; + + void AddParameter(ParameterAnalysisContext? par) + { + if (par != null && IsXMM(par)) + { + if (freg == LinuxFloatingRegister.Stack) + { + args.Add(InstructionSetIndependentOperand.MakeStack(stack)); + stack += ptrSize; + } + else args.Add(ToOperand(freg++)); + } + else + { + if (nreg == LinuxNormalRegister.Stack) + { + args.Add(InstructionSetIndependentOperand.MakeStack(stack)); + stack += ptrSize; + } + else args.Add(ToOperand(nreg++)); + } + } + + if (isReturningAnOversizedStructure) + { + args.Add(ToOperand(nreg++)); + } + + if (addThis) + { + args.Add(ToOperand(nreg++)); + } + + foreach (var par in ctx.Parameters) + { + AddParameter(par); + } + + AddParameter(null); // The MethodInfo argument + } + // else throw new NotSupportedException($"Resolution of 64-bit calling conventions is not supported for this binary type."); + + return args.ToArray(); + } + + private static InstructionSetIndependentOperand ToOperand(MicrosoftNormalRegister Reg) => Reg switch + { + MicrosoftNormalRegister.rcx => InstructionSetIndependentOperand.MakeRegister("rcx"), + MicrosoftNormalRegister.rdx => InstructionSetIndependentOperand.MakeRegister("rdx"), + MicrosoftNormalRegister.r8 => InstructionSetIndependentOperand.MakeRegister("r8"), + MicrosoftNormalRegister.r9 => InstructionSetIndependentOperand.MakeRegister("r9"), + _ => throw new InvalidOperationException("Went past the register limit during resolution.") + }; + + private static InstructionSetIndependentOperand ToOperand(LinuxNormalRegister Reg) => Reg switch + { + LinuxNormalRegister.rdi => InstructionSetIndependentOperand.MakeRegister("rdi"), + LinuxNormalRegister.rsi => InstructionSetIndependentOperand.MakeRegister("rsi"), + LinuxNormalRegister.rdx => InstructionSetIndependentOperand.MakeRegister("rdx"), + LinuxNormalRegister.rcx => InstructionSetIndependentOperand.MakeRegister("rcx"), + LinuxNormalRegister.r8 => InstructionSetIndependentOperand.MakeRegister("r8"), + LinuxNormalRegister.r9 => InstructionSetIndependentOperand.MakeRegister("r9"), + _ => throw new InvalidOperationException("Went past the register limit during resolution.") + }; + + private static InstructionSetIndependentOperand ToOperand(LinuxFloatingRegister Reg) => Reg switch + { + LinuxFloatingRegister.xmm0 => InstructionSetIndependentOperand.MakeRegister("xmm0"), + LinuxFloatingRegister.xmm1 => InstructionSetIndependentOperand.MakeRegister("xmm1"), + LinuxFloatingRegister.xmm2 => InstructionSetIndependentOperand.MakeRegister("xmm2"), + LinuxFloatingRegister.xmm3 => InstructionSetIndependentOperand.MakeRegister("xmm3"), + LinuxFloatingRegister.xmm4 => InstructionSetIndependentOperand.MakeRegister("xmm4"), + LinuxFloatingRegister.xmm5 => InstructionSetIndependentOperand.MakeRegister("xmm5"), + LinuxFloatingRegister.xmm6 => InstructionSetIndependentOperand.MakeRegister("xmm6"), + LinuxFloatingRegister.xmm7 => InstructionSetIndependentOperand.MakeRegister("xmm7"), + _ => throw new InvalidOperationException("Went past the register limit during resolution.") + }; + + private enum MicrosoftNormalRegister + { + rcx, rdx, r8, r9 + } + + private enum LinuxNormalRegister + { + rdi, rsi, rdx, rcx, r8, r9, Stack + } + + private enum LinuxFloatingRegister + { + xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, Stack + } +}