Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: Implement the x64 calling convention resolver (msvc/gcc) #333

Merged
merged 1 commit into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 80 additions & 64 deletions Cpp2IL.Core/InstructionSets/X86InstructionSet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
using Cpp2IL.Core.Extensions;
using Cpp2IL.Core.Il2CppApiFunctions;
using Cpp2IL.Core.ISIL;
using Cpp2IL.Core.Logging;
using Cpp2IL.Core.Model.Contexts;
using Cpp2IL.Core.Utils;
using Iced.Intel;
using LibCpp2IL.BinaryStructures;

namespace Cpp2IL.Core.InstructionSets;

// This is honestly an X64InstructionSet by all means. Everything here screams "I AM X64".
public class X86InstructionSet : Cpp2IlInstructionSet
{
private static readonly MasmFormatter Formatter = new();
Expand Down Expand Up @@ -61,20 +64,20 @@ public override List<InstructionSetIndependentInstruction> GetIsilFromMethod(Met

private void ConvertInstructionStatement(Instruction instruction, IsilBuilder builder, MethodAnalysisContext context)
{
// var callNoReturn = false; // stub, see case Mnemonic.Call
var callNoReturn = false;

switch (instruction.Mnemonic)
{
case Mnemonic.Mov:
case Mnemonic.Movzx: //For all intents and purposes we don't care about zero-extending
case Mnemonic.Movaps: //Movaps is basically just a mov but with the potential future detail that the size is dependent on reg size
case Mnemonic.Movups: //Movaps but unaligned
case Mnemonic.Movss: //Same as movaps but for floats
case Mnemonic.Movd: //Mov but specifically dword
case Mnemonic.Movq: //Mov but specifically qword
case Mnemonic.Movsd: //Mov but specifically double
case Mnemonic.Movdqa: //Movaps but multiple integers at once in theory
case Mnemonic.Cvtdq2ps: //Technically a convert double to single, but for analysis purposes we can just treat it as a move
case Mnemonic.Movzx: // For all intents and purposes we don't care about zero-extending
case Mnemonic.Movaps: // Movaps is basically just a mov but with the potential future detail that the size is dependent on reg size
case Mnemonic.Movups: // Movaps but unaligned
case Mnemonic.Movss: // Same as movaps but for floats
case Mnemonic.Movd: // Mov but specifically dword
case Mnemonic.Movq: // Mov but specifically qword
case Mnemonic.Movsd: // Mov but specifically double
case Mnemonic.Movdqa: // Movaps but multiple integers at once in theory
case Mnemonic.Cvtdq2ps: // Technically a convert double to single, but for analysis purposes we can just treat it as a move
builder.Move(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 1));
break;
case Mnemonic.Lea:
Expand Down Expand Up @@ -107,8 +110,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu
case Mnemonic.Imul:
if (instruction.OpCount == 1)
{
int OpSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize();
switch (OpSize) // TODO I don't know how to work with dual registers here in Iced, I left hints though
int opSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize();
switch (opSize) // TODO: I don't know how to work with dual registers here, I left hints though
{
case 1: // Op0 * AL -> AX
builder.Multiply(instruction.IP, Register.AX.MakeIndependent(), ConvertOperand(instruction, 0), Register.AL.MakeIndependent());
Expand Down Expand Up @@ -145,10 +148,18 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu

break;
case Mnemonic.Ret:
// TODO: Verify correctness of operation with Vectors.

// On x32, this will require better engineering since ulongs are handled somehow differently (return in 2 registers, I think?)
// The x64 prototype should work.
// Are st* registers even used in il2cpp games?

if (context.IsVoid)
builder.Return(instruction.IP);
else if (context.Definition?.RawReturnType?.Type is Il2CppTypeEnum.IL2CPP_TYPE_R4 or Il2CppTypeEnum.IL2CPP_TYPE_R8)
builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("xmm0"));
else
builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("rax")); //TODO Support xmm0
builder.Return(instruction.IP, InstructionSetIndependentOperand.MakeRegister("rax"));
break;
case Mnemonic.Push:
//var operandSize = instruction.Op0Kind == OpKind.Register ? instruction.Op0Register.GetSize() : instruction.MemorySize.GetSize();
Expand All @@ -164,7 +175,7 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu
case Mnemonic.Add:
var isSubtract = instruction.Mnemonic == Mnemonic.Sub;

//Special case - stack shift
// Special case - stack shift
if (instruction.Op0Register == Register.RSP && instruction.Op1Kind.IsImmediate())
{
var amount = (int)instruction.GetImmediate(1);
Expand All @@ -182,8 +193,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu
break;
case Mnemonic.Addss:
case Mnemonic.Subss:
//Addss and subss are just floating point add/sub, but we don't need to handle the stack stuff
//But we do need to handle 2 vs 3 operand forms
// Addss and subss are just floating point add/sub, but we don't need to handle the stack stuff
// But we do need to handle 2 vs 3 operand forms
InstructionSetIndependentOperand dest;
InstructionSetIndependentOperand src1;
InstructionSetIndependentOperand src2;
Expand All @@ -210,64 +221,69 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu
else
builder.Add(instruction.IP, dest, src1, src2);
break;
// The following pair of instructions does not update the Carry Flag (CF):
case Mnemonic.Dec:
builder.Subtract(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), InstructionSetIndependentOperand.MakeImmediate(1));
break;
case Mnemonic.Inc:
// no CF
var isDec = instruction.Mnemonic == Mnemonic.Dec;
var im = InstructionSetIndependentOperand.MakeImmediate(1);
if (isDec) builder.Subtract(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), im);
else builder.Add(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), im);
builder.Add(instruction.IP, ConvertOperand(instruction, 0), ConvertOperand(instruction, 0), InstructionSetIndependentOperand.MakeImmediate(1));
break;
case Mnemonic.Call:
// We don't try and resolve which method is being called, but we do need to know how many parameters it has
// I would hope that all of these methods have the same number of arguments, else how can they be inlined?
// TODO: Handle CallNoReturn(I have no idea how due to instructionAddress constantly being a limitation)

var target = instruction.NearBranchTarget;
if (context.AppContext.MethodsByAddress.ContainsKey(target))

if (context.AppContext.MethodsByAddress.TryGetValue(target, out var possibleMethods))
{
var possibleMethods = context.AppContext.MethodsByAddress[target];
var parameterCounts = possibleMethods.Select(p =>
if (possibleMethods.Count == 1)
{
var ret = p.Parameters.Count;
if (!p.IsStatic)
ret++; //This arg

ret++; //For MethodInfo arg
return ret;
});

// if (parameterCounts.Max() != parameterCounts.Min())
// throw new("Cannot handle call to address with multiple managed methods of different parameter counts");

var parameterCount = parameterCounts.Max();
var registerParams = new[] { "rcx", "rdx", "r8", "r9" }.Select(InstructionSetIndependentOperand.MakeRegister).ToList();

if (parameterCount <= registerParams.Count)
builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForManaged(possibleMethods[0]));
}
else
{
builder.Call(instruction.IP, target, registerParams.GetRange(0, parameterCount).ToArray());
return;
MethodAnalysisContext ctx = null!;
var lpars = -1;

// Very naive approach, folds with structs in parameters if GCC is used:
foreach (var method in possibleMethods)
{
var pars = method.ParameterCount;
if (method.IsStatic) pars++;
if (pars > lpars)
{
lpars = pars;
ctx = method;
}
}

// On post-analysis, you can discard methods according to the registers used, see X64CallingConventionResolver.
// This is less effective on GCC because MSVC doesn't overlap registers.

builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForManaged(ctx));
}
}
else
{
// This isn't a managed method, so for now we don't know its parameter count.
// This will need to be rewritten if we ever stumble upon an unmanaged method that accepts more than 4 parameters.
// These can be converted to dedicated ISIL instructions for specific API functions at a later stage. (by a post-processing step)

//Need to use stack
parameterCount -= registerParams.Count; //Subtract the 4 params we can fit in registers
builder.Call(instruction.IP, target, X64CallingConventionResolver.ResolveForUnmanaged(context.AppContext, target));
}

//Generate and append stack operands
var ptrSize = (int)context.AppContext.Binary.PointerSize;
registerParams = registerParams.Concat(Enumerable.Range(0, parameterCount).Select(p => p * ptrSize).Select(InstructionSetIndependentOperand.MakeStack)).ToList();
if (callNoReturn)
{
// Our function decided to jump into a thunk or do a funny return.
// We will insert a return after the call.
// According to common sense, such callee must have the same return value as the caller, unless it's __noreturn.
// I hope someone else will catch up on this and figure out non-returning functions.

builder.Call(instruction.IP, target, registerParams.ToArray());
// TODO: Determine whether a function is an actual thunk and it's *technically better* to duplicate code for it, or if it's a regular retcall.
// Basic implementation may use context.AppContext.MethodsByAddress, but this doesn't catch thunks only.
// For example, SWDT often calls gc::GarbageCollector::SetWriteBarrier through a long jmp chain. That's a whole function, not just a thunk.

//Discard the consumed stack space
builder.ShiftStack(instruction.IP, -parameterCount * 8);
}
else
{
//This isn't a managed method, so for now we don't know its parameter count.
//Add all four of the registers, I guess. If there are any functions that take more than 4 params,
//we'll have to do something else here.
//These can be converted to dedicated ISIL instructions for specific API functions at a later stage. (by a post-processing step)
var paramRegisters = new[] { "rcx", "rdx", "r8", "r9" }.Select(InstructionSetIndependentOperand.MakeRegister).ToArray();
builder.Call(instruction.IP, target, paramRegisters);
goto case Mnemonic.Ret;
}

break;
Expand All @@ -294,8 +310,8 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu

if (jumpTarget < methodStart || jumpTarget > methodEnd)
{
// callNoReturn = true;
goto case Mnemonic.Call; // This is like 99% likely a non returning call, jump to case to avoid code duplication
callNoReturn = true;
goto case Mnemonic.Call;
}
else
{
Expand Down Expand Up @@ -375,11 +391,11 @@ private void ConvertInstructionStatement(Instruction instruction, IsilBuilder bu
break;
case Mnemonic.Int:
case Mnemonic.Int3:
builder.Interrupt(instruction.IP); // We'll add it but eliminate later
builder.Interrupt(instruction.IP); // We'll add it but eliminate later, can be used as a hint since compilers only emit it in normally unreachable code or in error handlers
break;
case Mnemonic.Nop:
//While this is literally a nop and there's in theory no point emitting anything for it, it could be used as a jump target.
//So we'll emit an ISIL nop for it.
// While this is literally a nop and there's in theory no point emitting anything for it, it could be used as a jump target.
// So we'll emit an ISIL nop for it.
builder.Nop(instruction.IP);
break;
default:
Expand Down
Loading
Loading