Skip to content

Commit 346c734

Browse files
Merge pull request KhronosGroup#2283 from KhronosGroup/pr-2281
Land PR 2281
2 parents 08391d9 + 855a5c3 commit 346c734

File tree

4 files changed

+117
-90
lines changed

4 files changed

+117
-90
lines changed

reference/opt/shaders-msl/comp/atomic-image.msl31.comp

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,59 +14,60 @@ struct SSBO
1414

1515
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
1616

17-
kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d<int, access::read_write> iImage [[texture(1)]])
17+
kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d_array<uint, access::read_write> uImageArray [[texture(1)]], texture2d<int, access::read_write> iImage [[texture(2)]])
1818
{
1919
uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
20-
uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
21-
iImage.write(int4(int(_27)), uint2(int2(1, 6)));
22-
uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x;
23-
uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x;
24-
uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x;
25-
uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x;
26-
uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x;
27-
uint _44;
28-
uint4 _102;
20+
uint _27 = uImageArray.atomic_fetch_add(uint3(int3(1, 5, 8)).xy, uint3(int3(1, 5, 8)).z, 1u).x;
21+
uint _35 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
22+
iImage.write(int4(int(_35)), uint2(int2(1, 6)));
23+
uint _40 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x;
24+
uint _42 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x;
25+
uint _44 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x;
26+
uint _46 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x;
27+
uint _48 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x;
28+
uint _52;
29+
uint4 _110;
2930
do
3031
{
31-
_102.x = 10u;
32-
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u);
33-
_44 = _102.x;
34-
int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x;
35-
int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x;
36-
int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x;
37-
int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x;
38-
int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x;
39-
int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x;
40-
int _61;
41-
int4 _104;
32+
_110.x = 10u;
33+
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_110, 2u) && _110.x == 10u);
34+
_52 = _110.x;
35+
int _55 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x;
36+
int _57 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x;
37+
int _59 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x;
38+
int _61 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x;
39+
int _63 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x;
40+
int _65 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x;
41+
int _69;
42+
int4 _112;
4243
do
4344
{
44-
_104.x = 10;
45-
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10);
46-
_61 = _104.x;
47-
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
48-
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
49-
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
50-
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
51-
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
52-
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
53-
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
54-
uint _82;
45+
_112.x = 10;
46+
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_112, 2) && _112.x == 10);
47+
_69 = _112.x;
48+
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
49+
uint _78 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
50+
uint _80 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
51+
uint _82 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
52+
uint _84 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
53+
uint _86 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
54+
uint _88 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
55+
uint _90;
5556
do
5657
{
57-
_82 = 10u;
58-
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u);
59-
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
60-
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
61-
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
62-
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
63-
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
64-
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
65-
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
66-
int _99;
58+
_90 = 10u;
59+
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_90, 2u, memory_order_relaxed, memory_order_relaxed) && _90 == 10u);
60+
int _93 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
61+
int _95 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
62+
int _97 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
63+
int _99 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
64+
int _101 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
65+
int _103 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
66+
int _105 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
67+
int _107;
6768
do
6869
{
69-
_99 = 10;
70-
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10);
70+
_107 = 10;
71+
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_107, 2, memory_order_relaxed, memory_order_relaxed) && _107 == 10);
7172
}
7273

reference/shaders-msl/comp/atomic-image.msl31.comp

Lines changed: 45 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,59 +14,60 @@ struct SSBO
1414

1515
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
1616

17-
kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d<int, access::read_write> iImage [[texture(1)]])
17+
kernel void main0(device SSBO& ssbo [[buffer(0)]], texture2d<uint, access::read_write> uImage [[texture(0)]], texture2d_array<uint, access::read_write> uImageArray [[texture(1)]], texture2d<int, access::read_write> iImage [[texture(2)]])
1818
{
1919
uint _19 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
20-
uint _27 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
21-
iImage.write(int4(int(_27)), uint2(int2(1, 6)));
22-
uint _32 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x;
23-
uint _34 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x;
24-
uint _36 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x;
25-
uint _38 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x;
26-
uint _40 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x;
27-
uint _44;
28-
uint4 _102;
20+
uint _27 = uImageArray.atomic_fetch_add(uint3(int3(1, 5, 8)).xy, uint3(int3(1, 5, 8)).z, 1u).x;
21+
uint _35 = uImage.atomic_fetch_add(uint2(int2(1, 5)), 1u).x;
22+
iImage.write(int4(int(_35)), uint2(int2(1, 6)));
23+
uint _40 = uImage.atomic_fetch_or(uint2(int2(1, 5)), 1u).x;
24+
uint _42 = uImage.atomic_fetch_xor(uint2(int2(1, 5)), 1u).x;
25+
uint _44 = uImage.atomic_fetch_and(uint2(int2(1, 5)), 1u).x;
26+
uint _46 = uImage.atomic_fetch_min(uint2(int2(1, 5)), 1u).x;
27+
uint _48 = uImage.atomic_fetch_max(uint2(int2(1, 5)), 1u).x;
28+
uint _52;
29+
uint4 _110;
2930
do
3031
{
31-
_102.x = 10u;
32-
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_102, 2u) && _102.x == 10u);
33-
_44 = _102.x;
34-
int _47 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x;
35-
int _49 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x;
36-
int _51 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x;
37-
int _53 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x;
38-
int _55 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x;
39-
int _57 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x;
40-
int _61;
41-
int4 _104;
32+
_110.x = 10u;
33+
} while (!uImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_110, 2u) && _110.x == 10u);
34+
_52 = _110.x;
35+
int _55 = iImage.atomic_fetch_add(uint2(int2(1, 6)), 1).x;
36+
int _57 = iImage.atomic_fetch_or(uint2(int2(1, 6)), 1).x;
37+
int _59 = iImage.atomic_fetch_xor(uint2(int2(1, 6)), 1).x;
38+
int _61 = iImage.atomic_fetch_and(uint2(int2(1, 6)), 1).x;
39+
int _63 = iImage.atomic_fetch_min(uint2(int2(1, 6)), 1).x;
40+
int _65 = iImage.atomic_fetch_max(uint2(int2(1, 6)), 1).x;
41+
int _69;
42+
int4 _112;
4243
do
4344
{
44-
_104.x = 10;
45-
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_104, 2) && _104.x == 10);
46-
_61 = _104.x;
47-
uint _68 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
48-
uint _70 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
49-
uint _72 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
50-
uint _74 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
51-
uint _76 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
52-
uint _78 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
53-
uint _80 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
54-
uint _82;
45+
_112.x = 10;
46+
} while (!iImage.atomic_compare_exchange_weak(uint2(int2(1, 5)), &_112, 2) && _112.x == 10);
47+
_69 = _112.x;
48+
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
49+
uint _78 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
50+
uint _80 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
51+
uint _82 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
52+
uint _84 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
53+
uint _86 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
54+
uint _88 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
55+
uint _90;
5556
do
5657
{
57-
_82 = 10u;
58-
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_82, 2u, memory_order_relaxed, memory_order_relaxed) && _82 == 10u);
59-
int _85 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
60-
int _87 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
61-
int _89 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
62-
int _91 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
63-
int _93 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
64-
int _95 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
65-
int _97 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
66-
int _99;
58+
_90 = 10u;
59+
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_90, 2u, memory_order_relaxed, memory_order_relaxed) && _90 == 10u);
60+
int _93 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
61+
int _95 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
62+
int _97 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
63+
int _99 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
64+
int _101 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
65+
int _103 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
66+
int _105 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
67+
int _107;
6768
do
6869
{
69-
_99 = 10;
70-
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_99, 2, memory_order_relaxed, memory_order_relaxed) && _99 == 10);
70+
_107 = 10;
71+
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_107, 2, memory_order_relaxed, memory_order_relaxed) && _107 == 10);
7172
}
7273

shaders-msl/comp/atomic-image.msl31.comp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ layout(local_size_x = 1) in;
44

55
layout(r32ui, binding = 0) uniform highp uimage2D uImage;
66
layout(r32i, binding = 1) uniform highp iimage2D iImage;
7-
layout(binding = 2, std430) buffer SSBO
7+
layout(r32ui, binding = 2) uniform highp uimage2DArray uImageArray;
8+
layout(binding = 3, std430) buffer SSBO
89
{
910
uint u32;
1011
int i32;
@@ -13,6 +14,7 @@ layout(binding = 2, std430) buffer SSBO
1314
void main()
1415
{
1516
imageAtomicAdd(uImage, ivec2(1, 5), 1u);
17+
imageAtomicAdd(uImageArray, ivec3(1, 5, 8), 1u);
1618

1719
// Test that we do not invalidate OpImage variables which are loaded from UniformConstant
1820
// address space.

spirv_msl.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10196,9 +10196,32 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
1019610196

1019710197
// Will only be false if we're in "force recompile later" mode.
1019810198
if (split_index != string::npos)
10199-
exp += join(obj_expression.substr(0, split_index), ".", op, "(", obj_expression.substr(split_index + 1));
10199+
{
10200+
auto coord = obj_expression.substr(split_index + 1);
10201+
exp += join(obj_expression.substr(0, split_index), ".", op, "(");
10202+
if (res_type.basetype == SPIRType::Image && res_type.image.arrayed)
10203+
{
10204+
switch (res_type.image.dim)
10205+
{
10206+
case Dim1D:
10207+
exp += join(coord, ".x, ", coord, ".y");
10208+
break;
10209+
case Dim2D:
10210+
exp += join(coord, ".xy, ", coord, ".z");
10211+
break;
10212+
default:
10213+
SPIRV_CROSS_THROW("Cannot do atomics on Cube textures.");
10214+
}
10215+
}
10216+
else
10217+
{
10218+
exp += coord;
10219+
}
10220+
}
1020010221
else
10222+
{
1020110223
exp += obj_expression;
10224+
}
1020210225
}
1020310226
else
1020410227
{

0 commit comments

Comments
 (0)