Skip to content

Commit f64d106

Browse files
committed
Update llpc from commit 879e8809
[CONTINUATIONS] Add metadata required by RGP [CONTINUATIONS] Run some optimization pass for gpurt module Add NoContraction decoration to have a test for fmul_legacy mapping Add readfirstlane on the result of subgroupClusterReduction on gfx11+ Add RobustGsEmits to GFX10 Add support for GS patch primitive type amdllpc: Add more helpful info with PipelineLib* dumps Avoid upgrade to seqcst ordering Debug Printf refactor Downgrade SequentiallyConsistent to AcquireRelease Expose getResourceMappingNodeTypeName() to the driver lgc: Improve TANH expansion to avoid overflow lgc: Refactor getShaderStageAbbreviation lgc: Use agent scope in more places llpcSpirvLowerGlobal: Fix originUpperLeft handling llpcSpirvLowerGlobal: Refactor input/output lowering llvmraytracing: Separate out header file for pointee type metadata Postpone descriptor load to ImageBuilder Promote llvm-dialects submodule Set last-use for load from the continuation stack Support for LogRayTracingPipelineSummary Update shader tests after LLVM update Fix a typo on paClVsOutCntl Fix primitive type for barycentric Fix the cooperativematrix issues(convert+muladd) on gfx1010 Fixes for lit tests on standalone amdllpc build
1 parent 6c770c7 commit f64d106

File tree

182 files changed

+2520
-1911
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+2520
-1911
lines changed

compilerutils/include/compilerutils/CompilerUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newTy
7070
// Add an unreachable at the current position and remove the rest of the basic block.
7171
void createUnreachable(llvm::IRBuilder<> &b);
7272

73+
// Specifies a memory that is loaded is the last use.
74+
void setIsLastUseLoad(llvm::LoadInst &Load);
75+
7376
struct CrossModuleInlinerResult {
7477
llvm::Value *returnValue;
7578
llvm::iterator_range<llvm::Function::iterator> newBBs;

compilerutils/lib/CompilerUtils.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@
4141

4242
using namespace llvm;
4343

44+
// Whether this is a load instruction that should translate to a last_use
45+
// load.
46+
static constexpr const char *MDIsLastUseName = "amdgpu.last.use";
47+
4448
// =====================================================================================================================
4549
// Create an LLVM function call to the named function. The callee is built
4650
// automatically based on return type and its parameters.
@@ -150,6 +154,10 @@ void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) {
150154
DeleteDeadBlock(oldCode);
151155
}
152156

157+
void CompilerUtils::setIsLastUseLoad(llvm::LoadInst &Load) {
158+
Load.setMetadata(MDIsLastUseName, MDTuple::get(Load.getContext(), {}));
159+
}
160+
153161
namespace {
154162

155163
// Get the name of a global that is copied to a different module for inlining.

gfxruntime/src/shaders/AdvancedBlend.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
float4 AmdExtFragCoord() DUMMY_FLOAT4_FUNC
5050
int AmdExtSampleId() DUMMY_INT_FUNC
5151

52-
float4 AmdAdvancedBlendTexelLoad(int4 imageLow, int4 imageHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC
53-
float4 AmdAdvancedBlendTexelLoadFmask(int4 imageMsLow, int4 imageMsHigh, int4 fmaskLow, int4 fmaskHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC
52+
float4 AmdAdvancedBlendTexelLoad(int64_t imageDesc, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC
53+
float4 AmdAdvancedBlendTexelLoadFmask(int64_t imageDesc, int64_t fmaskDesc, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC
5454

5555
float4 AmdAdvancedBlendCoherentTexelLoad(float4 color, int2 iCoord, int sampleId) DUMMY_FLOAT4_FUNC
5656
void AmdAdvancedBlendCoherentTexelStore(float4 color, int2 iCoord, int sampleId) DUMMY_VOID_FUNC
@@ -224,8 +224,8 @@ float AmdAdvancedBlendDivide(float dividend, float divisor) {
224224
}
225225
}
226226

227-
export float4 AmdAdvancedBlendInternal(float4 inColor, int4 imageMsLow, int4 imageMsHigh, int4 imageLow, int4 imageHigh,
228-
int4 fmaskLow, int4 fmaskHigh, int mode, bool isMsaa) {
227+
export float4 AmdAdvancedBlendInternal(float4 inColor, int64_t imageDescMs, int64_t imageDesc, int64_t fmaskDesc,
228+
int mode, bool isMsaa) {
229229
float4 srcColor = inColor;
230230
if (mode == 0) {
231231
return srcColor;
@@ -234,9 +234,9 @@ export float4 AmdAdvancedBlendInternal(float4 inColor, int4 imageMsLow, int4 ima
234234
int2 iCoord = int2(fragCoord.x, fragCoord.y);
235235
float4 dstColor;
236236
if (isMsaa) {
237-
dstColor = AmdAdvancedBlendTexelLoadFmask(imageMsLow, imageMsHigh, fmaskLow, fmaskHigh, iCoord, 0);
237+
dstColor = AmdAdvancedBlendTexelLoadFmask(imageDescMs, fmaskDesc, iCoord, 0);
238238
} else {
239-
dstColor = AmdAdvancedBlendTexelLoad(imageLow, imageHigh, iCoord, 0);
239+
dstColor = AmdAdvancedBlendTexelLoad(imageDesc, iCoord, 0);
240240
}
241241
// TODO: Uncomment them once ROV is support in LLPC
242242
// int sampleId = AmdExtSampleId();

include/vkgcDefs.h

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ struct PipelineOptions {
472472
bool internalRtShaders; ///< Whether this pipeline has internal raytracing shaders
473473
unsigned forceNonUniformResourceIndexStageMask; ///< Mask of the stage to force using non-uniform resource index.
474474
bool reserved16;
475+
#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 73
475476
bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate
476477
bool disableSampleMask; ///< For OGL only, disabled if framebuffer doesn't attach multisample texture
477478
bool buildResourcesDataForShaderModule; ///< For OGL only, build resources usage data while building shader module
@@ -482,6 +483,25 @@ struct PipelineOptions {
482483
bool enableFragColor; ///< For OGL only, need to do frag color broadcast if it is enabled.
483484
bool disableBaseVertex; ///< For OGL only, force the BaseVertex builtin to 0 instead of
484485
/// loading it from userdata
486+
bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used
487+
bool bindlessImageMode; ///< For OGL only, true if bindless images are used
488+
const auto &getGlState() const { return *this; }
489+
#else
490+
struct GLState {
491+
bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate
492+
bool disableSampleMask; ///< For OGL only, disabled if framebuffer doesn't attach multisample texture
493+
bool buildResourcesDataForShaderModule; ///< For OGL only, build resources usage data while building shader module
494+
bool disableTruncCoordForGather; ///< If set, trunc_coord of sampler srd is disabled for gather4
495+
bool enableCombinedTexture; ///< For OGL only, use the 'set' for DescriptorCombinedTexture
496+
///< for sampled images and samplers
497+
bool vertex64BitsAttribSingleLoc; ///< For OGL only, dvec3/dvec4 vertex attrib only consumes 1 location.
498+
bool enableFragColor; ///< For OGL only, need to do frag color broadcast if it is enabled.
499+
bool disableBaseVertex; ///< For OGL only, force the BaseVertex builtin to 0 instead of
500+
bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used
501+
bool bindlessImageMode; ///< For OGL only, true if bindless images are used
502+
} glState;
503+
const auto &getGlState() const { return glState; }
504+
#endif
485505
unsigned reserved20;
486506
bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled
487507
bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader.
@@ -512,6 +532,7 @@ struct ResourceNodeData {
512532
unsigned isTexelFetchUsed; ///< TRUE if texelFetch is used
513533
unsigned isDefaultUniformSampler; ///< TRUE if it's sampler image in default uniform struct
514534
unsigned columnCount; ///< Column count if this is a matrix variable.
535+
unsigned componentCount; ///< Component count if this is a vector, row count if it is a matrix.
515536
BasicType basicType; ///< Type of the variable or element
516537
};
517538

@@ -545,6 +566,43 @@ struct ResourcesNodes {
545566
unsigned defaultUniformInfoCount;
546567
};
547568

569+
// raytracing system value usage flags
570+
union RayTracingSystemValueUsage {
571+
struct {
572+
union {
573+
struct {
574+
uint16_t flags : 1; // Shader calls gl_IncomingRayFlagsEXT
575+
uint16_t worldRayOrigin : 1; // Shader calls gl_WorldRayOriginEXT
576+
uint16_t tMin : 1; // Shader calls gl_RayTminEXT
577+
uint16_t worldRayDirection : 1; // Shader calls gl_WorldRayDirectionEXT
578+
uint16_t tCurrent : 1; // Shader calls gl_HitTEXT
579+
uint16_t launchId : 1; // Shader calls gl_LaunchIDEXT
580+
uint16_t launchSize : 1; // Shader calls gl_LaunchSizeEXT
581+
uint16_t reserved : 9; // Reserved
582+
};
583+
uint16_t u16All;
584+
} ray;
585+
586+
union {
587+
struct {
588+
uint16_t hitKind : 1; // Shader calls gl_HitKindEXT
589+
uint16_t instanceIndex : 1; // Shader calls gl_InstanceCustomIndexEXT
590+
uint16_t instanceID : 1; // Shader calls gl_InstanceID
591+
uint16_t primitiveIndex : 1; // Shader calls gl_PrimitiveID
592+
uint16_t geometryIndex : 1; // Shader calls gl_GeometryIndexEXT
593+
uint16_t objectToWorld : 1; // Shader calls gl_ObjectToWorldEXT
594+
uint16_t objectRayOrigin : 1; // Shader calls gl_ObjectRayOriginEXT
595+
uint16_t objectRayDirection : 1; // Shader calls gl_ObjectRayDirectionEXT
596+
uint16_t worldToObject : 1; // Shader calls gl_WorldToObjectEXT
597+
uint16_t hitTrianglePosition : 1; // Shader calls gl_HitTriangleVertexPositionsEXT
598+
uint16_t reserved : 6; // Reserved
599+
};
600+
uint16_t u16All;
601+
} primitive;
602+
};
603+
uint32_t u32All;
604+
};
605+
548606
/// Represents usage info of a shader module
549607
struct ShaderModuleUsage {
550608
bool enableVarPtrStorageBuf; ///< Whether to enable "VariablePointerStorageBuffer" capability
@@ -573,12 +631,14 @@ struct ShaderModuleUsage {
573631
bool pixelCenterInteger; ///< Whether pixel coord is Integer
574632
bool useGenericBuiltIn; ///< Whether to use builtIn inputs that include gl_PointCoord, gl_PrimitiveId,
575633
/// gl_Layer, gl_ClipDistance or gl_CullDistance.
634+
bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration
576635
bool enableXfb; ///< Whether transform feedback is enabled
577636
unsigned localSizeX; ///< Compute shader work-group size in the X dimension
578637
unsigned localSizeY; ///< Compute shader work-group size in the Y dimension
579638
unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension
580639
bool disableDualSource; ///< Whether disable dualSource blend
581640
uint32_t clipDistanceArraySize; ///< Count of output clip distance
641+
RayTracingSystemValueUsage rtSystemValueUsage; ///< Usage flags for ray tracing builtins
582642
};
583643

584644
/// Represents common part of shader module data
@@ -1001,43 +1061,6 @@ enum RayTracingRayFlag : unsigned {
10011061
};
10021062

10031063
// =====================================================================================================================
1004-
// raytracing system value usage flags
1005-
union RayTracingSystemValueUsage {
1006-
struct {
1007-
union {
1008-
struct {
1009-
uint16_t flags : 1; // Shader calls gl_IncomingRayFlagsEXT
1010-
uint16_t worldRayOrigin : 1; // Shader calls gl_WorldRayOriginEXT
1011-
uint16_t tMin : 1; // Shader calls gl_RayTminEXT
1012-
uint16_t worldRayDirection : 1; // Shader calls gl_WorldRayDirectionEXT
1013-
uint16_t tCurrent : 1; // Shader calls gl_HitTEXT
1014-
uint16_t launchId : 1; // Shader calls gl_LaunchIDEXT
1015-
uint16_t launchSize : 1; // Shader calls gl_LaunchSizeEXT
1016-
uint16_t reserved : 9; // Reserved
1017-
};
1018-
uint16_t u16All;
1019-
} ray;
1020-
1021-
union {
1022-
struct {
1023-
uint16_t hitKind : 1; // Shader calls gl_HitKindEXT
1024-
uint16_t instanceIndex : 1; // Shader calls gl_InstanceCustomIndexEXT
1025-
uint16_t instanceID : 1; // Shader calls gl_InstanceID
1026-
uint16_t primitiveIndex : 1; // Shader calls gl_PrimitiveID
1027-
uint16_t geometryIndex : 1; // Shader calls gl_GeometryIndexEXT
1028-
uint16_t objectToWorld : 1; // Shader calls gl_ObjectToWorldEXT
1029-
uint16_t objectRayOrigin : 1; // Shader calls gl_ObjectRayOriginEXT
1030-
uint16_t objectRayDirection : 1; // Shader calls gl_ObjectRayDirectionEXT
1031-
uint16_t worldToObject : 1; // Shader calls gl_WorldToObjectEXT
1032-
uint16_t hitTrianglePosition : 1; // Shader calls gl_HitTriangleVertexPositionsEXT
1033-
uint16_t reserved : 6; // Reserved
1034-
};
1035-
uint16_t u16All;
1036-
} primitive;
1037-
};
1038-
uint32_t u32All;
1039-
};
1040-
10411064
/// Represents ray-tracing shader export configuration
10421065
struct RayTracingShaderExportConfig {
10431066
unsigned indirectCallingConvention; ///< Indirect calling convention
@@ -1299,6 +1322,7 @@ struct GraphicsPipelineBuildInfo {
12991322
float pixelTransferBias[4]; ///< Bias apply to render color target
13001323
bool enableColorClampVs; ///< Enable clamp vertex output color
13011324
bool enableColorClampFs; ///< Enable clamp fragment output color
1325+
bool enableFlatShade; ///< Whether enable flat shade.
13021326
} glState;
13031327
const auto &getGlState() const { return glState; }
13041328
#endif
@@ -1597,6 +1621,7 @@ class IUtil {
15971621
///
15981622
/// @param [in] spvBin SPIR-V binary
15991623
static const char *VKAPI_CALL GetEntryPointNameFromSpirvBinary(const BinaryData *spvBin);
1624+
static const char *VKAPI_CALL GetResourceMappingNodeTypeName(ResourceMappingNodeType type);
16001625
};
16011626

16021627
/// 128-bit hash compatible structure

lgc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ target_sources(LLVMlgc PRIVATE
191191
state/ShaderModes.cpp
192192
state/ShaderStage.cpp
193193
state/TargetInfo.cpp
194+
state/RuntimeContext.cpp
194195
)
195196

196197
# lgc/util

lgc/builder/ArithBuilder.cpp

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -507,29 +507,20 @@ Value *BuilderImpl::CreateCosh(Value *x, const Twine &instName) {
507507
// @param x : Input value X
508508
// @param instName : Name to give instruction(s)
509509
Value *BuilderImpl::CreateTanh(Value *x, const Twine &instName) {
510-
// sinh(x) / cosh(x)
511-
// (e^x - e^(-x))/(e^x + e^(-x))
510+
// tanh(x) = copysign(1-2/(e^-|2x|+1),x)
512511
// 1/log(2) = 1.442695
513-
// e^x = 2^(x*(1/log(2))) = 2^(x*1.442695))
514-
Value *divLog2 = CreateFMul(x, getRecipLog2(x->getType()));
515-
Value *negDivLog2 = CreateFSub(ConstantFP::get(x->getType(), 0.0), divLog2);
516-
Value *exp = CreateUnaryIntrinsic(Intrinsic::exp2, divLog2);
517-
Value *expNeg = CreateUnaryIntrinsic(Intrinsic::exp2, negDivLog2);
518-
Value *doubleSinh = CreateFSub(exp, expNeg);
519-
Value *doubleCosh = CreateFAdd(exp, expNeg);
520-
Value *result = fDivFast(doubleSinh, doubleCosh);
521-
522-
if (!getFastMathFlags().noInfs()) {
523-
// NOTE: If the fast math flags might have INFs, we should check the special case when the input is +INF or -INF.
524-
// According to the limit of tanh(x), we have following definitions:
525-
// / 1.0, when x -> +INF
526-
// lim(tanh(x)) =
527-
// \ -1.0, when x -> -INF
528-
Value *one = ConstantFP::get(x->getType(), 1.0);
529-
Value *isInf = CreateIsInf(x);
530-
result = CreateSelect(isInf, CreateCopySign(one, x), result);
531-
}
532-
512+
// e = 2^(1/log(2))
513+
// e^-|2x| = 2^(-|2x|*(1/log(2)))
514+
auto vTy = x->getType();
515+
Value *result = CreateIntrinsic(Intrinsic::fabs, vTy, x);
516+
result = CreateFNeg(result);
517+
result = CreateFMul(ConstantFP::get(vTy, 2.0), result);
518+
result = CreateFMul(getRecipLog2(vTy), result);
519+
result = CreateUnaryIntrinsic(Intrinsic::exp2, result);
520+
result = CreateFAdd(ConstantFP::get(vTy, 1.0), result);
521+
result = fDivFast(ConstantFP::get(vTy, 2.0), result);
522+
result = CreateFSub(ConstantFP::get(vTy, 1.0), result);
523+
result = CreateCopySign(result, x);
533524
result->setName(instName);
534525
return result;
535526
}

lgc/builder/BuilderImpl.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -249,22 +249,20 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu
249249

250250
// =====================================================================================================================
251251
// Get whether the context we are building in supports ds_bpermute or v_bpermute across all lanes in the wave
252-
bool BuilderImpl::supportWaveWideBPermute() const {
252+
//
253+
// @param shaderStage : shader stage enum.
254+
bool BuilderImpl::supportWaveWideBPermute(ShaderStageEnum shaderStage) const {
253255
auto gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion().major;
254256
auto supportBPermute = gfxIp == 8 || gfxIp == 9;
255-
auto shaderStage = getShaderStage(GetInsertBlock()->getParent());
256-
auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage.value());
257+
auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage);
257258
supportBPermute = supportBPermute || waveSize == 32;
258259
return supportBPermute;
259260
}
260261

261262
// =====================================================================================================================
262263
// Get whether the context we are building in supports permute lane 64 DPP operations.
263264
bool BuilderImpl::supportPermLane64Dpp() const {
264-
auto gfxip = getPipelineState()->getTargetInfo().getGfxIpVersion().major;
265-
auto shaderStage = getShaderStage(GetInsertBlock()->getParent());
266-
auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage.value());
267-
return gfxip >= 11 && waveSize == 64;
265+
return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11;
268266
}
269267

270268
// =====================================================================================================================

lgc/builder/DescBuilder.cpp

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -394,45 +394,47 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc, unsigned stride) {
394394
Value *descElem1 = CreateExtractElement(desc, 1);
395395

396396
// Build normal buffer descriptor
397-
// Dword 0
398397
Value *bufDesc = PoisonValue::get(FixedVectorType::get(getInt32Ty(), 4));
399-
bufDesc = CreateInsertElement(bufDesc, descElem0, uint64_t(0));
400-
401-
// Dword 1
402-
SqBufRsrcWord1 sqBufRsrcWord1 = {};
403-
sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX;
404-
descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All));
405-
if (stride) {
406-
SqBufRsrcWord1 sqBufRsrcWord1Stride = {};
407-
sqBufRsrcWord1Stride.bits.stride = stride;
408-
descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All));
409-
}
410-
bufDesc = CreateInsertElement(bufDesc, descElem1, 1);
411-
412-
// Dword 2
413-
SqBufRsrcWord2 sqBufRsrcWord2 = {};
414-
sqBufRsrcWord2.bits.numRecords = UINT32_MAX;
415-
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord2.u32All), 2);
416-
417-
// Dword 3
418-
SqBufRsrcWord3 sqBufRsrcWord3 = {};
419-
sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X;
420-
sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y;
421-
sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z;
422-
sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W;
423-
if (gfxIp.major == 10) {
424-
sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT;
425-
sqBufRsrcWord3.gfx10.resourceLevel = 1;
426-
sqBufRsrcWord3.gfx10.oobSelect = stride ? 3 : 2;
427-
assert(sqBufRsrcWord3.u32All == 0x21014FAC || sqBufRsrcWord3.u32All == 0x31014FAC);
428-
} else if (gfxIp.major >= 11) {
429-
sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT;
430-
sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2;
431-
assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC);
432-
} else {
433-
llvm_unreachable("Not implemented!");
398+
{
399+
// Dword 0
400+
bufDesc = CreateInsertElement(bufDesc, descElem0, uint64_t(0));
401+
402+
// Dword 1
403+
SqBufRsrcWord1 sqBufRsrcWord1 = {};
404+
sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX;
405+
descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All));
406+
if (stride) {
407+
SqBufRsrcWord1 sqBufRsrcWord1Stride = {};
408+
sqBufRsrcWord1Stride.bits.stride = stride;
409+
descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All));
410+
}
411+
bufDesc = CreateInsertElement(bufDesc, descElem1, 1);
412+
413+
// Dword 2
414+
SqBufRsrcWord2 sqBufRsrcWord2 = {};
415+
sqBufRsrcWord2.bits.numRecords = UINT32_MAX;
416+
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord2.u32All), 2);
417+
418+
// Dword 3
419+
SqBufRsrcWord3 sqBufRsrcWord3 = {};
420+
sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X;
421+
sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y;
422+
sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z;
423+
sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W;
424+
if (gfxIp.major == 10) {
425+
sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT;
426+
sqBufRsrcWord3.gfx10.resourceLevel = 1;
427+
sqBufRsrcWord3.gfx10.oobSelect = stride ? 3 : 2;
428+
assert(sqBufRsrcWord3.u32All == 0x21014FAC || sqBufRsrcWord3.u32All == 0x31014FAC);
429+
} else if (gfxIp.major >= 11) {
430+
sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT;
431+
sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2;
432+
assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC);
433+
} else {
434+
llvm_unreachable("Not implemented!");
435+
}
436+
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3);
434437
}
435-
bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3);
436438

437439
return bufDesc;
438440
}

0 commit comments

Comments
 (0)