Skip to content

Commit 79f7546

Browse files
committed
AMDGPU: Fix counting kernel arguments towards register usage
Also use DataLayout to get type size. Relying on the IR type size is also pretty broken here, since this won't perfectly capture how types are legalized.
1 parent 226beb4 commit 79f7546

File tree

2 files changed

+34
-10
lines changed

2 files changed

+34
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,18 +1029,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
10291029
// Account for extra SGPRs and VGPRs reserved for debugger use.
10301030
ProgInfo.NumSGPR += ExtraSGPRs;
10311031

1032+
const Function &F = MF.getFunction();
1033+
10321034
// Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
10331035
// dispatch registers are function args.
10341036
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
1035-
for (auto &Arg : MF.getFunction().args()) {
1036-
unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
1037-
if (Arg.hasAttribute(Attribute::InReg))
1038-
WaveDispatchNumSGPR += NumRegs;
1039-
else
1040-
WaveDispatchNumVGPR += NumRegs;
1037+
1038+
if (isShader(F.getCallingConv())) {
1039+
// FIXME: We should be using the number of registers determined during
1040+
// calling convention lowering to legalize the types.
1041+
const DataLayout &DL = F.getParent()->getDataLayout();
1042+
for (auto &Arg : F.args()) {
1043+
unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
1044+
if (Arg.hasAttribute(Attribute::InReg))
1045+
WaveDispatchNumSGPR += NumRegs;
1046+
else
1047+
WaveDispatchNumVGPR += NumRegs;
1048+
}
1049+
ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
1050+
ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
10411051
}
1042-
ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
1043-
ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
10441052

10451053
// Adjust number of registers used to meet default/requested minimum/maximum
10461054
// number of waves per execution unit request.

llvm/test/CodeGen/AMDGPU/code-object-v3.ll

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@
5050
; OSABI-AMDHSA-ELF: .rodata PROGBITS {{[0-9]+}} {{[0-9]+}} {{[0-9a-f]+}} {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
5151

5252
; OSABI-AMDHSA-ELF: Relocation section '.rela.rodata' at offset
53-
; OSABI-AMDHSA-ELF: 0000000000000010 0000000100000005 R_AMDGPU_REL64 0000000000000000 fadd + 10
54-
; OSABI-AMDHSA-ELF: 0000000000000050 0000000300000005 R_AMDGPU_REL64 0000000000000100 fsub + 10
53+
; OSABI-AMDHSA-ELF: 0000000000000010 0000000300000005 R_AMDGPU_REL64 0000000000000000 fadd + 10
54+
; OSABI-AMDHSA-ELF: 0000000000000050 0000000500000005 R_AMDGPU_REL64 0000000000000100 fsub + 10
55+
; OSABI-AMDHSA-ELF: 0000000000000090 0000000100000005 R_AMDGPU_REL64 0000000000000200 empty + 10
5556

5657
; OSABI-AMDHSA-ELF: Symbol table '.symtab' contains {{[0-9]+}} entries
5758
; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 {{[0-9]+}} FUNC GLOBAL PROTECTED {{[0-9]+}} fadd
@@ -85,3 +86,18 @@ entry:
8586
store float %r.val, float addrspace(1)* %r
8687
ret void
8788
}
89+
90+
; Make sure kernel arguments do not count towards the number of
91+
; registers used.
92+
;
93+
; ALL-ASM-LABEL: {{^}}empty:
94+
; ALL-ASM: .amdhsa_next_free_vgpr 1
95+
; ALL-ASM: .amdhsa_next_free_sgpr 1
96+
define amdgpu_kernel void @empty(
97+
i32 %i,
98+
float addrspace(1)* %r,
99+
float addrspace(1)* %a,
100+
float addrspace(1)* %b) {
101+
entry:
102+
ret void
103+
}

0 commit comments

Comments
 (0)