Commit 7b7168d0 by John Kessenich

Merge branch 'GL_KHR_shader_subgroup' into 'master'

Add shader subgroup support. See merge request !18
parents 79aed3b7 445e3899
......@@ -246,7 +246,7 @@ namespace spv {
spv::Id spirvbin_t::localId(spv::Id id, spv::Id newId)
{
assert(id != spv::NoResult && newId != spv::NoResult);
//assert(id != spv::NoResult && newId != spv::NoResult);
if (id >= idMapL.size())
idMapL.resize(id+1, unused);
......
......@@ -46,12 +46,12 @@ namespace spv {
typedef unsigned int Id;
#define SPV_VERSION 0x10000
#define SPV_REVISION 10
#define SPV_VERSION 0x10200
#define SPV_REVISION 1
static const unsigned int MagicNumber = 0x07230203;
static const unsigned int Version = 0x00010000;
static const unsigned int Revision = 10;
static const unsigned int Version = 0x00010200;
static const unsigned int Revision = 1;
static const unsigned int OpCodeMask = 0xffff;
static const unsigned int WordCountShift = 16;
......@@ -122,6 +122,15 @@ enum ExecutionMode {
ExecutionModeOutputTriangleStrip = 29,
ExecutionModeVecTypeHint = 30,
ExecutionModeContractionOff = 31,
ExecutionModeInitializer = 33,
ExecutionModeFinalizer = 34,
ExecutionModeSubgroupSize = 35,
ExecutionModeSubgroupsPerWorkgroup = 36,
ExecutionModeSubgroupsPerWorkgroupId = 37,
ExecutionModeLocalSizeId = 38,
ExecutionModeLocalSizeHintId = 39,
ExecutionModeInvocationsId = 40,
ExecutionModeOutputVerticesId = 41,
ExecutionModeMax = 0x7fffffff,
};
......@@ -376,10 +385,23 @@ enum Decoration {
DecorationNoContraction = 42,
DecorationInputAttachmentIndex = 43,
DecorationAlignment = 44,
DecorationMaxByteOffset = 45,
DecorationAlignmentId = 46,
DecorationMaxByteOffsetId = 47,
DecorationArrayStrideId = 48,
DecorationMatrixStrideId = 49,
DecorationStreamId = 50,
DecorationLocationId = 51,
DecorationIndexId = 52,
DecorationBindingId = 53,
DecorationDescriptorSetId = 54,
DecorationOffsetId = 55,
DecorationInputAttachmentIndexId = 56,
DecorationOverrideCoverageNV = 5248,
DecorationPassthroughNV = 5250,
DecorationViewportRelativeNV = 5252,
DecorationSecondaryViewportRelativeNV = 5256,
DecorationSecondaryViewportRelativeIdNV = 5263,
DecorationMax = 0x7fffffff,
};
......@@ -425,10 +447,15 @@ enum BuiltIn {
BuiltInSubgroupLocalInvocationId = 41,
BuiltInVertexIndex = 42,
BuiltInInstanceIndex = 43,
BuiltInSubgroupEqMask = 4416,
BuiltInSubgroupEqMaskKHR = 4416,
BuiltInSubgroupGeMask = 4417,
BuiltInSubgroupGeMaskKHR = 4417,
BuiltInSubgroupGtMask = 4418,
BuiltInSubgroupGtMaskKHR = 4418,
BuiltInSubgroupLeMask = 4419,
BuiltInSubgroupLeMaskKHR = 4419,
BuiltInSubgroupLtMask = 4420,
BuiltInSubgroupLtMaskKHR = 4420,
BuiltInBaseVertex = 4424,
BuiltInBaseInstance = 4425,
......@@ -458,6 +485,8 @@ enum SelectionControlMask {
enum LoopControlShift {
LoopControlUnrollShift = 0,
LoopControlDontUnrollShift = 1,
LoopControlDependencyInfiniteShift = 2,
LoopControlDependencyLengthShift = 3,
LoopControlMax = 0x7fffffff,
};
......@@ -465,6 +494,8 @@ enum LoopControlMask {
LoopControlMaskNone = 0,
LoopControlUnrollMask = 0x00000001,
LoopControlDontUnrollMask = 0x00000002,
LoopControlDependencyInfiniteMask = 0x00000004,
LoopControlDependencyLengthMask = 0x00000008,
};
enum FunctionControlShift {
......@@ -538,6 +569,7 @@ enum GroupOperation {
GroupOperationReduce = 0,
GroupOperationInclusiveScan = 1,
GroupOperationExclusiveScan = 2,
GroupOperationClusteredReduce = 3,
GroupOperationMax = 0x7fffffff,
};
......@@ -615,6 +647,17 @@ enum Capability {
CapabilityStorageImageReadWithoutFormat = 55,
CapabilityStorageImageWriteWithoutFormat = 56,
CapabilityMultiViewport = 57,
CapabilitySubgroupDispatch = 58,
CapabilityNamedBarrier = 59,
CapabilityPipeStorage = 60,
CapabilityGroupNonUniform = 61,
CapabilityGroupNonUniformVote = 62,
CapabilityGroupNonUniformArithmetic = 63,
CapabilityGroupNonUniformBallot = 64,
CapabilityGroupNonUniformShuffle = 65,
CapabilityGroupNonUniformShuffleRelative = 66,
CapabilityGroupNonUniformClustered = 67,
CapabilityGroupNonUniformQuad = 68,
CapabilitySubgroupBallotKHR = 4423,
CapabilityDrawParameters = 4427,
CapabilitySubgroupVoteKHR = 4431,
......@@ -932,6 +975,52 @@ enum Op {
OpAtomicFlagTestAndSet = 318,
OpAtomicFlagClear = 319,
OpImageSparseRead = 320,
OpSizeOf = 321,
OpTypePipeStorage = 322,
OpConstantPipeStorage = 323,
OpCreatePipeFromPipeStorage = 324,
OpGetKernelLocalSizeForSubgroupCount = 325,
OpGetKernelMaxNumSubgroups = 326,
OpTypeNamedBarrier = 327,
OpNamedBarrierInitialize = 328,
OpMemoryNamedBarrier = 329,
OpModuleProcessed = 330,
OpExecutionModeId = 331,
OpDecorateId = 332,
OpGroupNonUniformElect = 333,
OpGroupNonUniformAll = 334,
OpGroupNonUniformAny = 335,
OpGroupNonUniformAllEqual = 336,
OpGroupNonUniformBroadcast = 337,
OpGroupNonUniformBroadcastFirst = 338,
OpGroupNonUniformBallot = 339,
OpGroupNonUniformInverseBallot = 340,
OpGroupNonUniformBallotBitExtract = 341,
OpGroupNonUniformBallotBitCount = 342,
OpGroupNonUniformBallotFindLSB = 343,
OpGroupNonUniformBallotFindMSB = 344,
OpGroupNonUniformShuffle = 345,
OpGroupNonUniformShuffleXor = 346,
OpGroupNonUniformShuffleUp = 347,
OpGroupNonUniformShuffleDown = 348,
OpGroupNonUniformIAdd = 349,
OpGroupNonUniformFAdd = 350,
OpGroupNonUniformIMul = 351,
OpGroupNonUniformFMul = 352,
OpGroupNonUniformSMin = 353,
OpGroupNonUniformUMin = 354,
OpGroupNonUniformFMin = 355,
OpGroupNonUniformSMax = 356,
OpGroupNonUniformUMax = 357,
OpGroupNonUniformFMax = 358,
OpGroupNonUniformBitwiseAnd = 359,
OpGroupNonUniformBitwiseOr = 360,
OpGroupNonUniformBitwiseXor = 361,
OpGroupNonUniformLogicalAnd = 362,
OpGroupNonUniformLogicalOr = 363,
OpGroupNonUniformLogicalXor = 364,
OpGroupNonUniformQuadBroadcast = 365,
OpGroupNonUniformQuadSwap = 366,
OpSubgroupBallotKHR = 4421,
OpSubgroupFirstInvocationKHR = 4422,
OpSubgroupAllKHR = 4428,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
spv.subgroupBasic.comp
Warning, version 450 is not yet complete; most version-specific features are present, but some are missing.
// Module Version 10200
// Generated by (magic number): 80001
// Id's are bound by 41
Capability Shader
Capability GroupNonUniform
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint GLCompute 4 "main" 14 19 22 25
ExecutionMode 4 LocalSize 8 8 1
Source GLSL 450
SourceExtension "GL_KHR_shader_subgroup_basic"
Name 4 "main"
Name 8 "Buffer"
MemberName 8(Buffer) 0 "a"
Name 10 "data"
Name 14 "gl_SubgroupSize"
Name 19 "gl_SubgroupInvocationID"
Name 22 "gl_NumSubgroups"
Name 25 "gl_SubgroupID"
Decorate 7 ArrayStride 4
MemberDecorate 8(Buffer) 0 Offset 0
Decorate 8(Buffer) BufferBlock
Decorate 10(data) DescriptorSet 0
Decorate 10(data) Binding 0
Decorate 14(gl_SubgroupSize) RelaxedPrecision
Decorate 14(gl_SubgroupSize) BuiltIn SubgroupSize
Decorate 15 RelaxedPrecision
Decorate 19(gl_SubgroupInvocationID) RelaxedPrecision
Decorate 19(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
Decorate 20 RelaxedPrecision
Decorate 22(gl_NumSubgroups) BuiltIn NumSubgroups
Decorate 25(gl_SubgroupID) BuiltIn SubgroupId
Decorate 40 BuiltIn WorkgroupSize
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 1
7: TypeRuntimeArray 6(int)
8(Buffer): TypeStruct 7
9: TypePointer Uniform 8(Buffer)
10(data): 9(ptr) Variable Uniform
11: 6(int) Constant 0
12: TypeInt 32 0
13: TypePointer Input 12(int)
14(gl_SubgroupSize): 13(ptr) Variable Input
16: 6(int) Constant 1
17: TypePointer Uniform 6(int)
19(gl_SubgroupInvocationID): 13(ptr) Variable Input
22(gl_NumSubgroups): 13(ptr) Variable Input
25(gl_SubgroupID): 13(ptr) Variable Input
27: TypeBool
28: 12(int) Constant 3
32: 12(int) Constant 1
33: 12(int) Constant 0
34: 12(int) Constant 4048
35: 12(int) Constant 64
36: 12(int) Constant 256
37: 12(int) Constant 2048
38: TypeVector 12(int) 3
39: 12(int) Constant 8
40: 38(ivec3) ConstantComposite 39 39 32
4(main): 2 Function None 3
5: Label
15: 12(int) Load 14(gl_SubgroupSize)
18: 17(ptr) AccessChain 10(data) 11 15
Store 18 16
20: 12(int) Load 19(gl_SubgroupInvocationID)
21: 17(ptr) AccessChain 10(data) 11 20
Store 21 16
23: 12(int) Load 22(gl_NumSubgroups)
24: 17(ptr) AccessChain 10(data) 11 23
Store 24 16
26: 12(int) Load 25(gl_SubgroupID)
29: 27(bool) GroupNonUniformElect 28
30: 6(int) Select 29 16 11
31: 17(ptr) AccessChain 10(data) 11 26
Store 31 30
ControlBarrier 28 32 33
MemoryBarrier 28 34
MemoryBarrier 28 35
MemoryBarrier 28 36
MemoryBarrier 28 37
Return
FunctionEnd
#version 450
#extension GL_KHR_shader_subgroup_ballot: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
uvec4 relMask = gl_SubgroupEqMask +
gl_SubgroupGeMask +
gl_SubgroupGtMask +
gl_SubgroupLeMask +
gl_SubgroupLtMask;
uvec4 result = subgroupBallot(true);
data[invocation].u4.x = subgroupBallotBitCount(result);
data[invocation].u4.y = subgroupBallotBitExtract(result, 0) ? 1 : 0;
data[invocation].u4.z = subgroupBallotInclusiveBitCount(result) + subgroupBallotExclusiveBitCount(result);
data[invocation].u4.w = subgroupBallotFindLSB(result) + subgroupBallotFindMSB(result);
if ((relMask == result) && subgroupInverseBallot(data[0].u4))
{
data[invocation].f4.x = subgroupBroadcast(data[0].f4.x, invocation);
data[invocation].f4.xy = subgroupBroadcast(data[1].f4.xy, invocation);
data[invocation].f4.xyz = subgroupBroadcast(data[2].f4.xyz, invocation);
data[invocation].f4 = subgroupBroadcast(data[3].f4, invocation);
data[invocation].i4.x = subgroupBroadcast(data[0].i4.x, invocation);
data[invocation].i4.xy = subgroupBroadcast(data[1].i4.xy, invocation);
data[invocation].i4.xyz = subgroupBroadcast(data[2].i4.xyz, invocation);
data[invocation].i4 = subgroupBroadcast(data[3].i4, invocation);
data[invocation].u4.x = subgroupBroadcast(data[0].u4.x, invocation);
data[invocation].u4.xy = subgroupBroadcast(data[1].u4.xy, invocation);
data[invocation].u4.xyz = subgroupBroadcast(data[2].u4.xyz, invocation);
data[invocation].u4 = subgroupBroadcast(data[3].u4, invocation);
data[invocation].d4.x = subgroupBroadcast(data[0].d4.x, invocation);
data[invocation].d4.xy = subgroupBroadcast(data[1].d4.xy, invocation);
data[invocation].d4.xyz = subgroupBroadcast(data[2].d4.xyz, invocation);
data[invocation].d4 = subgroupBroadcast(data[3].d4, invocation);
data[invocation].i4.x = int(subgroupBroadcast(data[0].i4.x < 0, invocation));
data[invocation].i4.xy = ivec2(subgroupBroadcast(lessThan(data[1].i4.xy, ivec2(0)), invocation));
data[invocation].i4.xyz = ivec3(subgroupBroadcast(lessThan(data[1].i4.xyz, ivec3(0)), invocation));
data[invocation].i4 = ivec4(subgroupBroadcast(lessThan(data[1].i4, ivec4(0)), invocation));
}
else
{
data[invocation].f4.x = subgroupBroadcastFirst(data[0].f4.x);
data[invocation].f4.xy = subgroupBroadcastFirst(data[1].f4.xy);
data[invocation].f4.xyz = subgroupBroadcastFirst(data[2].f4.xyz);
data[invocation].f4 = subgroupBroadcastFirst(data[3].f4);
data[invocation].i4.x = subgroupBroadcastFirst(data[0].i4.x);
data[invocation].i4.xy = subgroupBroadcastFirst(data[1].i4.xy);
data[invocation].i4.xyz = subgroupBroadcastFirst(data[2].i4.xyz);
data[invocation].i4 = subgroupBroadcastFirst(data[3].i4);
data[invocation].u4.x = subgroupBroadcastFirst(data[0].u4.x);
data[invocation].u4.xy = subgroupBroadcastFirst(data[1].u4.xy);
data[invocation].u4.xyz = subgroupBroadcastFirst(data[2].u4.xyz);
data[invocation].u4 = subgroupBroadcastFirst(data[3].u4);
data[invocation].d4.x = subgroupBroadcastFirst(data[0].d4.x);
data[invocation].d4.xy = subgroupBroadcastFirst(data[1].d4.xy);
data[invocation].d4.xyz = subgroupBroadcastFirst(data[2].d4.xyz);
data[invocation].d4 = subgroupBroadcastFirst(data[3].d4);
data[invocation].i4.x = int(subgroupBroadcastFirst(data[0].i4.x < 0));
data[invocation].i4.xy = ivec2(subgroupBroadcastFirst(lessThan(data[1].i4.xy, ivec2(0))));
data[invocation].i4.xyz = ivec3(subgroupBroadcastFirst(lessThan(data[1].i4.xyz, ivec3(0))));
data[invocation].i4 = ivec4(subgroupBroadcastFirst(lessThan(data[1].i4, ivec4(0))));
}
}
#version 450
#extension GL_KHR_shader_subgroup_basic: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffer
{
int a[];
} data;
void main()
{
data.a[gl_SubgroupSize] = 1;
data.a[gl_SubgroupInvocationID] = 1;
data.a[gl_NumSubgroups] = 1;
data.a[gl_SubgroupID] = (subgroupElect()) ? 1 : 0;
subgroupBarrier();
subgroupMemoryBarrier();
subgroupMemoryBarrierBuffer();
subgroupMemoryBarrierShared();
subgroupMemoryBarrierImage();
}
#version 450
#extension GL_KHR_shader_subgroup_clustered: enable
layout (local_size_x = 8) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
data[invocation].f4.x = subgroupClusteredAdd(data[0].f4.x, 1);
data[invocation].f4.xy = subgroupClusteredAdd(data[1].f4.xy, 1);
data[invocation].f4.xyz = subgroupClusteredAdd(data[2].f4.xyz, 1);
data[invocation].f4 = subgroupClusteredAdd(data[3].f4, 1);
data[invocation].i4.x = subgroupClusteredAdd(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredAdd(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredAdd(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredAdd(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredAdd(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredAdd(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredAdd(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredAdd(data[3].u4, 1);
data[invocation].d4.x = subgroupClusteredAdd(data[0].d4.x, 1);
data[invocation].d4.xy = subgroupClusteredAdd(data[1].d4.xy, 1);
data[invocation].d4.xyz = subgroupClusteredAdd(data[2].d4.xyz, 1);
data[invocation].d4 = subgroupClusteredAdd(data[3].d4, 1);
data[invocation].f4.x = subgroupClusteredMul(data[0].f4.x, 1);
data[invocation].f4.xy = subgroupClusteredMul(data[1].f4.xy, 1);
data[invocation].f4.xyz = subgroupClusteredMul(data[2].f4.xyz, 1);
data[invocation].f4 = subgroupClusteredMul(data[3].f4, 1);
data[invocation].i4.x = subgroupClusteredMul(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredMul(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredMul(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredMul(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredMul(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredMul(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredMul(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredMul(data[3].u4, 1);
data[invocation].d4.x = subgroupClusteredMul(data[0].d4.x, 1);
data[invocation].d4.xy = subgroupClusteredMul(data[1].d4.xy, 1);
data[invocation].d4.xyz = subgroupClusteredMul(data[2].d4.xyz, 1);
data[invocation].d4 = subgroupClusteredMul(data[3].d4, 1);
data[invocation].f4.x = subgroupClusteredMin(data[0].f4.x, 1);
data[invocation].f4.xy = subgroupClusteredMin(data[1].f4.xy, 1);
data[invocation].f4.xyz = subgroupClusteredMin(data[2].f4.xyz, 1);
data[invocation].f4 = subgroupClusteredMin(data[3].f4, 1);
data[invocation].i4.x = subgroupClusteredMin(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredMin(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredMin(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredMin(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredMin(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredMin(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredMin(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredMin(data[3].u4, 1);
data[invocation].d4.x = subgroupClusteredMin(data[0].d4.x, 1);
data[invocation].d4.xy = subgroupClusteredMin(data[1].d4.xy, 1);
data[invocation].d4.xyz = subgroupClusteredMin(data[2].d4.xyz, 1);
data[invocation].d4 = subgroupClusteredMin(data[3].d4, 1);
data[invocation].f4.x = subgroupClusteredMax(data[0].f4.x, 1);
data[invocation].f4.xy = subgroupClusteredMax(data[1].f4.xy, 1);
data[invocation].f4.xyz = subgroupClusteredMax(data[2].f4.xyz, 1);
data[invocation].f4 = subgroupClusteredMax(data[3].f4, 1);
data[invocation].i4.x = subgroupClusteredMax(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredMax(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredMax(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredMax(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredMax(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredMax(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredMax(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredMax(data[3].u4, 1);
data[invocation].d4.x = subgroupClusteredMax(data[0].d4.x, 1);
data[invocation].d4.xy = subgroupClusteredMax(data[1].d4.xy, 1);
data[invocation].d4.xyz = subgroupClusteredMax(data[2].d4.xyz, 1);
data[invocation].d4 = subgroupClusteredMax(data[3].d4, 1);
data[invocation].i4.x = subgroupClusteredAnd(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredAnd(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredAnd(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredAnd(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredAnd(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredAnd(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredAnd(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredAnd(data[3].u4, 1);
data[invocation].i4.x = int(subgroupClusteredAnd(data[0].i4.x < 0, 1));
data[invocation].i4.xy = ivec2(subgroupClusteredAnd(lessThan(data[1].i4.xy, ivec2(0)), 1));
data[invocation].i4.xyz = ivec3(subgroupClusteredAnd(lessThan(data[1].i4.xyz, ivec3(0)), 1));
data[invocation].i4 = ivec4(subgroupClusteredAnd(lessThan(data[1].i4, ivec4(0)), 1));
data[invocation].i4.x = subgroupClusteredOr(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredOr(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredOr(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredOr(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredOr(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredOr(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredOr(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredOr(data[3].u4, 1);
data[invocation].i4.x = int(subgroupClusteredOr(data[0].i4.x < 0, 1));
data[invocation].i4.xy = ivec2(subgroupClusteredOr(lessThan(data[1].i4.xy, ivec2(0)), 1));
data[invocation].i4.xyz = ivec3(subgroupClusteredOr(lessThan(data[1].i4.xyz, ivec3(0)), 1));
data[invocation].i4 = ivec4(subgroupClusteredOr(lessThan(data[1].i4, ivec4(0)), 1));
data[invocation].i4.x = subgroupClusteredXor(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupClusteredXor(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupClusteredXor(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupClusteredXor(data[3].i4, 1);
data[invocation].u4.x = subgroupClusteredXor(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupClusteredXor(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupClusteredXor(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupClusteredXor(data[3].u4, 1);
data[invocation].i4.x = int(subgroupClusteredXor(data[0].i4.x < 0, 1));
data[invocation].i4.xy = ivec2(subgroupClusteredXor(lessThan(data[1].i4.xy, ivec2(0)), 1));
data[invocation].i4.xyz = ivec3(subgroupClusteredXor(lessThan(data[1].i4.xyz, ivec3(0)), 1));
data[invocation].i4 = ivec4(subgroupClusteredXor(lessThan(data[1].i4, ivec4(0)), 1));
}
#version 450
#extension GL_KHR_shader_subgroup_quad: enable
layout (local_size_x = 8) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
data[invocation].f4.x = subgroupQuadBroadcast(data[0].f4.x, 1);
data[invocation].f4.xy = subgroupQuadBroadcast(data[1].f4.xy, 1);
data[invocation].f4.xyz = subgroupQuadBroadcast(data[2].f4.xyz, 1);
data[invocation].f4 = subgroupQuadBroadcast(data[3].f4, 1);
data[invocation].i4.x = subgroupQuadBroadcast(data[0].i4.x, 1);
data[invocation].i4.xy = subgroupQuadBroadcast(data[1].i4.xy, 1);
data[invocation].i4.xyz = subgroupQuadBroadcast(data[2].i4.xyz, 1);
data[invocation].i4 = subgroupQuadBroadcast(data[3].i4, 1);
data[invocation].u4.x = subgroupQuadBroadcast(data[0].u4.x, 1);
data[invocation].u4.xy = subgroupQuadBroadcast(data[1].u4.xy, 1);
data[invocation].u4.xyz = subgroupQuadBroadcast(data[2].u4.xyz, 1);
data[invocation].u4 = subgroupQuadBroadcast(data[3].u4, 1);
data[invocation].d4.x = subgroupQuadBroadcast(data[0].d4.x, 1);
data[invocation].d4.xy = subgroupQuadBroadcast(data[1].d4.xy, 1);
data[invocation].d4.xyz = subgroupQuadBroadcast(data[2].d4.xyz, 1);
data[invocation].d4 = subgroupQuadBroadcast(data[3].d4, 1);
data[invocation].i4.x = int(subgroupQuadBroadcast(data[0].i4.x < 0, 1));
data[invocation].i4.xy = ivec2(subgroupQuadBroadcast(lessThan(data[1].i4.xy, ivec2(0)), 1));
data[invocation].i4.xyz = ivec3(subgroupQuadBroadcast(lessThan(data[1].i4.xyz, ivec3(0)), 1));
data[invocation].i4 = ivec4(subgroupQuadBroadcast(lessThan(data[1].i4, ivec4(0)), 1));
data[invocation].f4.x = subgroupQuadSwapHorizontal(data[0].f4.x);
data[invocation].f4.xy = subgroupQuadSwapHorizontal(data[1].f4.xy);
data[invocation].f4.xyz = subgroupQuadSwapHorizontal(data[2].f4.xyz);
data[invocation].f4 = subgroupQuadSwapHorizontal(data[3].f4);
data[invocation].i4.x = subgroupQuadSwapHorizontal(data[0].i4.x);
data[invocation].i4.xy = subgroupQuadSwapHorizontal(data[1].i4.xy);
data[invocation].i4.xyz = subgroupQuadSwapHorizontal(data[2].i4.xyz);
data[invocation].i4 = subgroupQuadSwapHorizontal(data[3].i4);
data[invocation].u4.x = subgroupQuadSwapHorizontal(data[0].u4.x);
data[invocation].u4.xy = subgroupQuadSwapHorizontal(data[1].u4.xy);
data[invocation].u4.xyz = subgroupQuadSwapHorizontal(data[2].u4.xyz);
data[invocation].u4 = subgroupQuadSwapHorizontal(data[3].u4);
data[invocation].d4.x = subgroupQuadSwapHorizontal(data[0].d4.x);
data[invocation].d4.xy = subgroupQuadSwapHorizontal(data[1].d4.xy);
data[invocation].d4.xyz = subgroupQuadSwapHorizontal(data[2].d4.xyz);
data[invocation].d4 = subgroupQuadSwapHorizontal(data[3].d4);
data[invocation].i4.x = int(subgroupQuadSwapHorizontal(data[0].i4.x < 0));
data[invocation].i4.xy = ivec2(subgroupQuadSwapHorizontal(lessThan(data[1].i4.xy, ivec2(0))));
data[invocation].i4.xyz = ivec3(subgroupQuadSwapHorizontal(lessThan(data[1].i4.xyz, ivec3(0))));
data[invocation].i4 = ivec4(subgroupQuadSwapHorizontal(lessThan(data[1].i4, ivec4(0))));
data[invocation].f4.x = subgroupQuadSwapVertical(data[0].f4.x);
data[invocation].f4.xy = subgroupQuadSwapVertical(data[1].f4.xy);
data[invocation].f4.xyz = subgroupQuadSwapVertical(data[2].f4.xyz);
data[invocation].f4 = subgroupQuadSwapVertical(data[3].f4);
data[invocation].i4.x = subgroupQuadSwapVertical(data[0].i4.x);
data[invocation].i4.xy = subgroupQuadSwapVertical(data[1].i4.xy);
data[invocation].i4.xyz = subgroupQuadSwapVertical(data[2].i4.xyz);
data[invocation].i4 = subgroupQuadSwapVertical(data[3].i4);
data[invocation].u4.x = subgroupQuadSwapVertical(data[0].u4.x);
data[invocation].u4.xy = subgroupQuadSwapVertical(data[1].u4.xy);
data[invocation].u4.xyz = subgroupQuadSwapVertical(data[2].u4.xyz);
data[invocation].u4 = subgroupQuadSwapVertical(data[3].u4);
data[invocation].d4.x = subgroupQuadSwapVertical(data[0].d4.x);
data[invocation].d4.xy = subgroupQuadSwapVertical(data[1].d4.xy);
data[invocation].d4.xyz = subgroupQuadSwapVertical(data[2].d4.xyz);
data[invocation].d4 = subgroupQuadSwapVertical(data[3].d4);
data[invocation].i4.x = int(subgroupQuadSwapVertical(data[0].i4.x < 0));
data[invocation].i4.xy = ivec2(subgroupQuadSwapVertical(lessThan(data[1].i4.xy, ivec2(0))));
data[invocation].i4.xyz = ivec3(subgroupQuadSwapVertical(lessThan(data[1].i4.xyz, ivec3(0))));
data[invocation].i4 = ivec4(subgroupQuadSwapVertical(lessThan(data[1].i4, ivec4(0))));
data[invocation].f4.x = subgroupQuadSwapDiagonal(data[0].f4.x);
data[invocation].f4.xy = subgroupQuadSwapDiagonal(data[1].f4.xy);
data[invocation].f4.xyz = subgroupQuadSwapDiagonal(data[2].f4.xyz);
data[invocation].f4 = subgroupQuadSwapDiagonal(data[3].f4);
data[invocation].i4.x = subgroupQuadSwapDiagonal(data[0].i4.x);
data[invocation].i4.xy = subgroupQuadSwapDiagonal(data[1].i4.xy);
data[invocation].i4.xyz = subgroupQuadSwapDiagonal(data[2].i4.xyz);
data[invocation].i4 = subgroupQuadSwapDiagonal(data[3].i4);
data[invocation].u4.x = subgroupQuadSwapDiagonal(data[0].u4.x);
data[invocation].u4.xy = subgroupQuadSwapDiagonal(data[1].u4.xy);
data[invocation].u4.xyz = subgroupQuadSwapDiagonal(data[2].u4.xyz);
data[invocation].u4 = subgroupQuadSwapDiagonal(data[3].u4);
data[invocation].d4.x = subgroupQuadSwapDiagonal(data[0].d4.x);
data[invocation].d4.xy = subgroupQuadSwapDiagonal(data[1].d4.xy);
data[invocation].d4.xyz = subgroupQuadSwapDiagonal(data[2].d4.xyz);
data[invocation].d4 = subgroupQuadSwapDiagonal(data[3].d4);
data[invocation].i4.x = int(subgroupQuadSwapDiagonal(data[0].i4.x < 0));
data[invocation].i4.xy = ivec2(subgroupQuadSwapDiagonal(lessThan(data[1].i4.xy, ivec2(0))));
data[invocation].i4.xyz = ivec3(subgroupQuadSwapDiagonal(lessThan(data[1].i4.xyz, ivec3(0))));
data[invocation].i4 = ivec4(subgroupQuadSwapDiagonal(lessThan(data[1].i4, ivec4(0))));
}
#version 450
#extension GL_KHR_shader_subgroup_shuffle: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
data[invocation].f4.x = subgroupShuffle(data[0].f4.x, invocation);
data[invocation].f4.xy = subgroupShuffle(data[1].f4.xy, invocation);
data[invocation].f4.xyz = subgroupShuffle(data[2].f4.xyz, invocation);
data[invocation].f4 = subgroupShuffle(data[3].f4, invocation);
data[invocation].i4.x = subgroupShuffle(data[0].i4.x, invocation);
data[invocation].i4.xy = subgroupShuffle(data[1].i4.xy, invocation);
data[invocation].i4.xyz = subgroupShuffle(data[2].i4.xyz, invocation);
data[invocation].i4 = subgroupShuffle(data[3].i4, invocation);
data[invocation].u4.x = subgroupShuffle(data[0].u4.x, invocation);
data[invocation].u4.xy = subgroupShuffle(data[1].u4.xy, invocation);
data[invocation].u4.xyz = subgroupShuffle(data[2].u4.xyz, invocation);
data[invocation].u4 = subgroupShuffle(data[3].u4, invocation);
data[invocation].d4.x = subgroupShuffle(data[0].d4.x, invocation);
data[invocation].d4.xy = subgroupShuffle(data[1].d4.xy, invocation);
data[invocation].d4.xyz = subgroupShuffle(data[2].d4.xyz, invocation);
data[invocation].d4 = subgroupShuffle(data[3].d4, invocation);
data[invocation].i4.x = int(subgroupShuffle(data[0].i4.x < 0, invocation));
data[invocation].i4.xy = ivec2(subgroupShuffle(lessThan(data[1].i4.xy, ivec2(0)), invocation));
data[invocation].i4.xyz = ivec3(subgroupShuffle(lessThan(data[1].i4.xyz, ivec3(0)), invocation));
data[invocation].i4 = ivec4(subgroupShuffle(lessThan(data[1].i4, ivec4(0)), invocation));
data[invocation].f4.x = subgroupShuffleXor(data[0].f4.x, invocation);
data[invocation].f4.xy = subgroupShuffleXor(data[1].f4.xy, invocation);
data[invocation].f4.xyz = subgroupShuffleXor(data[2].f4.xyz, invocation);
data[invocation].f4 = subgroupShuffleXor(data[3].f4, invocation);
data[invocation].i4.x = subgroupShuffleXor(data[0].i4.x, invocation);
data[invocation].i4.xy = subgroupShuffleXor(data[1].i4.xy, invocation);
data[invocation].i4.xyz = subgroupShuffleXor(data[2].i4.xyz, invocation);
data[invocation].i4 = subgroupShuffleXor(data[3].i4, invocation);
data[invocation].u4.x = subgroupShuffleXor(data[0].u4.x, invocation);
data[invocation].u4.xy = subgroupShuffleXor(data[1].u4.xy, invocation);
data[invocation].u4.xyz = subgroupShuffleXor(data[2].u4.xyz, invocation);
data[invocation].u4 = subgroupShuffleXor(data[3].u4, invocation);
data[invocation].d4.x = subgroupShuffleXor(data[0].d4.x, invocation);
data[invocation].d4.xy = subgroupShuffleXor(data[1].d4.xy, invocation);
data[invocation].d4.xyz = subgroupShuffleXor(data[2].d4.xyz, invocation);
data[invocation].d4 = subgroupShuffleXor(data[3].d4, invocation);
data[invocation].i4.x = int(subgroupShuffleXor(data[0].i4.x < 0, invocation));
data[invocation].i4.xy = ivec2(subgroupShuffleXor(lessThan(data[1].i4.xy, ivec2(0)), invocation));
data[invocation].i4.xyz = ivec3(subgroupShuffleXor(lessThan(data[1].i4.xyz, ivec3(0)), invocation));
data[invocation].i4 = ivec4(subgroupShuffleXor(lessThan(data[1].i4, ivec4(0)), invocation));
}
#version 450
#extension GL_KHR_shader_subgroup_shuffle_relative: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
data[invocation].f4.x = subgroupShuffleUp(data[0].f4.x, invocation);
data[invocation].f4.xy = subgroupShuffleUp(data[1].f4.xy, invocation);
data[invocation].f4.xyz = subgroupShuffleUp(data[2].f4.xyz, invocation);
data[invocation].f4 = subgroupShuffleUp(data[3].f4, invocation);
data[invocation].i4.x = subgroupShuffleUp(data[0].i4.x, invocation);
data[invocation].i4.xy = subgroupShuffleUp(data[1].i4.xy, invocation);
data[invocation].i4.xyz = subgroupShuffleUp(data[2].i4.xyz, invocation);
data[invocation].i4 = subgroupShuffleUp(data[3].i4, invocation);
data[invocation].u4.x = subgroupShuffleUp(data[0].u4.x, invocation);
data[invocation].u4.xy = subgroupShuffleUp(data[1].u4.xy, invocation);
data[invocation].u4.xyz = subgroupShuffleUp(data[2].u4.xyz, invocation);
data[invocation].u4 = subgroupShuffleUp(data[3].u4, invocation);
data[invocation].d4.x = subgroupShuffleUp(data[0].d4.x, invocation);
data[invocation].d4.xy = subgroupShuffleUp(data[1].d4.xy, invocation);
data[invocation].d4.xyz = subgroupShuffleUp(data[2].d4.xyz, invocation);
data[invocation].d4 = subgroupShuffleUp(data[3].d4, invocation);
data[invocation].i4.x = int(subgroupShuffleUp(data[0].i4.x < 0, invocation));
data[invocation].i4.xy = ivec2(subgroupShuffleUp(lessThan(data[1].i4.xy, ivec2(0)), invocation));
data[invocation].i4.xyz = ivec3(subgroupShuffleUp(lessThan(data[1].i4.xyz, ivec3(0)), invocation));
data[invocation].i4 = ivec4(subgroupShuffleUp(lessThan(data[1].i4, ivec4(0)), invocation));
data[invocation].f4.x = subgroupShuffleDown(data[0].f4.x, invocation);
data[invocation].f4.xy = subgroupShuffleDown(data[1].f4.xy, invocation);
data[invocation].f4.xyz = subgroupShuffleDown(data[2].f4.xyz, invocation);
data[invocation].f4 = subgroupShuffleDown(data[3].f4, invocation);
data[invocation].i4.x = subgroupShuffleDown(data[0].i4.x, invocation);
data[invocation].i4.xy = subgroupShuffleDown(data[1].i4.xy, invocation);
data[invocation].i4.xyz = subgroupShuffleDown(data[2].i4.xyz, invocation);
data[invocation].i4 = subgroupShuffleDown(data[3].i4, invocation);
data[invocation].u4.x = subgroupShuffleDown(data[0].u4.x, invocation);
data[invocation].u4.xy = subgroupShuffleDown(data[1].u4.xy, invocation);
data[invocation].u4.xyz = subgroupShuffleDown(data[2].u4.xyz, invocation);
data[invocation].u4 = subgroupShuffleDown(data[3].u4, invocation);
data[invocation].d4.x = subgroupShuffleDown(data[0].d4.x, invocation);
data[invocation].d4.xy = subgroupShuffleDown(data[1].d4.xy, invocation);
data[invocation].d4.xyz = subgroupShuffleDown(data[2].d4.xyz, invocation);
data[invocation].d4 = subgroupShuffleDown(data[3].d4, invocation);
data[invocation].i4.x = int(subgroupShuffleDown(data[0].i4.x < 0, invocation));
data[invocation].i4.xy = ivec2(subgroupShuffleDown(lessThan(data[1].i4.xy, ivec2(0)), invocation));
data[invocation].i4.xyz = ivec3(subgroupShuffleDown(lessThan(data[1].i4.xyz, ivec3(0)), invocation));
data[invocation].i4 = ivec4(subgroupShuffleDown(lessThan(data[1].i4, ivec4(0)), invocation));
}
#version 450
#extension GL_KHR_shader_subgroup_vote: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffers
{
vec4 f4;
ivec4 i4;
uvec4 u4;
dvec4 d4;
int r;
} data[4];
void main()
{
uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
if (subgroupAll(data[invocation].r < 0))
{
data[invocation].r = int(subgroupAllEqual(data[0].f4.x));
data[invocation].r = int(subgroupAllEqual(data[1].f4.xy));
data[invocation].r = int(subgroupAllEqual(data[2].f4.xyz));
data[invocation].r = int(subgroupAllEqual(data[3].f4));
data[invocation].r = int(subgroupAllEqual(data[0].i4.x));
data[invocation].r = int(subgroupAllEqual(data[1].i4.xy));
data[invocation].r = int(subgroupAllEqual(data[2].i4.xyz));
data[invocation].r = int(subgroupAllEqual(data[3].i4));
data[invocation].r = int(subgroupAllEqual(data[0].u4.x));
data[invocation].r = int(subgroupAllEqual(data[1].u4.xy));
data[invocation].r = int(subgroupAllEqual(data[2].u4.xyz));
data[invocation].r = int(subgroupAllEqual(data[3].u4));
}
else if (subgroupAny(data[invocation].r < 0))
{
data[invocation].r = int(subgroupAllEqual(data[0].d4.x));
data[invocation].r = int(subgroupAllEqual(data[1].d4.xy));
data[invocation].r = int(subgroupAllEqual(data[2].d4.xyz));
data[invocation].r = int(subgroupAllEqual(data[3].d4));
data[invocation].r = int(int(subgroupAllEqual(data[0].i4.x < 0)));
data[invocation].r = int(ivec2(subgroupAllEqual(lessThan(data[1].i4.xy, ivec2(0)))));
data[invocation].r = int(ivec3(subgroupAllEqual(lessThan(data[1].i4.xyz, ivec3(0)))));
data[invocation].r = int(ivec4(subgroupAllEqual(lessThan(data[1].i4, ivec4(0)))));
}
}
......@@ -137,6 +137,8 @@ enum TBuiltInVariable {
EbvLocalInvocationId,
EbvGlobalInvocationId,
EbvLocalInvocationIndex,
EbvNumSubgroups,
EbvSubgroupID,
EbvSubGroupSize,
EbvSubGroupInvocation,
EbvSubGroupEqMask,
......@@ -144,6 +146,13 @@ enum TBuiltInVariable {
EbvSubGroupGtMask,
EbvSubGroupLeMask,
EbvSubGroupLtMask,
EbvSubgroupSize2,
EbvSubgroupInvocation2,
EbvSubgroupEqMask2,
EbvSubgroupGeMask2,
EbvSubgroupGtMask2,
EbvSubgroupLeMask2,
EbvSubgroupLtMask2,
EbvVertexId,
EbvInstanceId,
EbvVertexIndex,
......
......@@ -329,6 +329,62 @@ enum TOperator {
EOpAllInvocations,
EOpAllInvocationsEqual,
EOpSubgroupBarrier,
EOpSubgroupMemoryBarrier,
EOpSubgroupMemoryBarrierBuffer,
EOpSubgroupMemoryBarrierImage,
EOpSubgroupMemoryBarrierShared, // compute only
EOpSubgroupElect,
EOpSubgroupAll,
EOpSubgroupAny,
EOpSubgroupAllEqual,
EOpSubgroupBroadcast,
EOpSubgroupBroadcastFirst,
EOpSubgroupBallot,
EOpSubgroupInverseBallot,
EOpSubgroupBallotBitExtract,
EOpSubgroupBallotBitCount,
EOpSubgroupBallotInclusiveBitCount,
EOpSubgroupBallotExclusiveBitCount,
EOpSubgroupBallotFindLSB,
EOpSubgroupBallotFindMSB,
EOpSubgroupShuffle,
EOpSubgroupShuffleXor,
EOpSubgroupShuffleUp,
EOpSubgroupShuffleDown,
EOpSubgroupAdd,
EOpSubgroupMul,
EOpSubgroupMin,
EOpSubgroupMax,
EOpSubgroupAnd,
EOpSubgroupOr,
EOpSubgroupXor,
EOpSubgroupInclusiveAdd,
EOpSubgroupInclusiveMul,
EOpSubgroupInclusiveMin,
EOpSubgroupInclusiveMax,
EOpSubgroupInclusiveAnd,
EOpSubgroupInclusiveOr,
EOpSubgroupInclusiveXor,
EOpSubgroupExclusiveAdd,
EOpSubgroupExclusiveMul,
EOpSubgroupExclusiveMin,
EOpSubgroupExclusiveMax,
EOpSubgroupExclusiveAnd,
EOpSubgroupExclusiveOr,
EOpSubgroupExclusiveXor,
EOpSubgroupClusteredAdd,
EOpSubgroupClusteredMul,
EOpSubgroupClusteredMin,
EOpSubgroupClusteredMax,
EOpSubgroupClusteredAnd,
EOpSubgroupClusteredOr,
EOpSubgroupClusteredXor,
EOpSubgroupQuadBroadcast,
EOpSubgroupQuadSwapHorizontal,
EOpSubgroupQuadSwapVertical,
EOpSubgroupQuadSwapDiagonal,
#ifdef AMD_EXTENSIONS
EOpMinInvocations,
EOpMaxInvocations,
......
......@@ -180,6 +180,14 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_ARB_sparse_texture2] = EBhDisable;
extensionBehavior[E_GL_ARB_sparse_texture_clamp] = EBhDisable;
// extensionBehavior[E_GL_ARB_cull_distance] = EBhDisable; // present for 4.5, but need extension control over block members
extensionBehavior[E_GL_KHR_shader_subgroup_basic] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_vote] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_arithmetic] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_ballot] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_shuffle] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_shuffle_relative] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_clustered] = EBhDisable;
extensionBehavior[E_GL_KHR_shader_subgroup_quad] = EBhDisable;
extensionBehavior[E_GL_EXT_shader_non_constant_global_initializers] = EBhDisable;
extensionBehavior[E_GL_EXT_shader_image_load_formatted] = EBhDisable;
......@@ -310,6 +318,16 @@ void TParseVersions::getPreamble(std::string& preamble)
"#define GL_EXT_shader_non_constant_global_initializers 1\n"
"#define GL_EXT_shader_image_load_formatted 1\n"
// GL_KHR_shader_subgroup
"#define GL_KHR_shader_subgroup_basic 1\n"
"#define GL_KHR_shader_subgroup_vote 1\n"
"#define GL_KHR_shader_subgroup_arithmetic 1\n"
"#define GL_KHR_shader_subgroup_ballot 1\n"
"#define GL_KHR_shader_subgroup_shuffle 1\n"
"#define GL_KHR_shader_subgroup_shuffle_relative 1\n"
"#define GL_KHR_shader_subgroup_clustered 1\n"
"#define GL_KHR_shader_subgroup_quad 1\n"
#ifdef AMD_EXTENSIONS
"#define GL_AMD_shader_ballot 1\n"
"#define GL_AMD_shader_trinary_minmax 1\n"
......@@ -645,6 +663,21 @@ void TParseVersions::updateExtensionBehavior(int line, const char* extension, co
updateExtensionBehavior(line, "GL_OES_shader_io_blocks", behaviorString);
else if (strcmp(extension, "GL_GOOGLE_include_directive") == 0)
updateExtensionBehavior(line, "GL_GOOGLE_cpp_style_line_directive", behaviorString);
// subgroup_* to subgroup_basic
else if (strcmp(extension, "GL_KHR_shader_subgroup_vote") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_arithmetic") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_ballot") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_shuffle") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_shuffle_relative") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_clustered") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
else if (strcmp(extension, "GL_KHR_shader_subgroup_quad") == 0)
updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
}
void TParseVersions::updateExtensionBehavior(const char* extension, TExtensionBehavior behavior)
......
......@@ -128,6 +128,14 @@ const char* const E_GL_ARB_shader_ballot = "GL_ARB_shader_ballot"
const char* const E_GL_ARB_sparse_texture2 = "GL_ARB_sparse_texture2";
const char* const E_GL_ARB_sparse_texture_clamp = "GL_ARB_sparse_texture_clamp";
// const char* const E_GL_ARB_cull_distance = "GL_ARB_cull_distance"; // present for 4.5, but need extension control over block members
const char* const E_GL_KHR_shader_subgroup_basic = "GL_KHR_shader_subgroup_basic";
const char* const E_GL_KHR_shader_subgroup_vote = "GL_KHR_shader_subgroup_vote";
const char* const E_GL_KHR_shader_subgroup_arithmetic = "GL_KHR_shader_subgroup_arithmetic";
const char* const E_GL_KHR_shader_subgroup_ballot = "GL_KHR_shader_subgroup_ballot";
const char* const E_GL_KHR_shader_subgroup_shuffle = "GL_KHR_shader_subgroup_shuffle";
const char* const E_GL_KHR_shader_subgroup_shuffle_relative = "GL_KHR_shader_subgroup_shuffle_relative";
const char* const E_GL_KHR_shader_subgroup_clustered = "GL_KHR_shader_subgroup_clustered";
const char* const E_GL_KHR_shader_subgroup_quad = "GL_KHR_shader_subgroup_quad";
const char* const E_GL_EXT_shader_non_constant_global_initializers = "GL_EXT_shader_non_constant_global_initializers";
const char* const E_GL_EXT_shader_image_load_formatted = "GL_EXT_shader_image_load_formatted";
......
......@@ -384,6 +384,57 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node)
case EOpAllInvocations: out.debug << "allInvocations"; break;
case EOpAllInvocationsEqual: out.debug << "allInvocationsEqual"; break;
case EOpSubgroupElect: out.debug << "subgroupElect"; break;
case EOpSubgroupAll: out.debug << "subgroupAll"; break;
case EOpSubgroupAny: out.debug << "subgroupAny"; break;
case EOpSubgroupAllEqual: out.debug << "subgroupAllEqual"; break;
case EOpSubgroupBroadcast: out.debug << "subgroupBroadcast"; break;
case EOpSubgroupBroadcastFirst: out.debug << "subgroupBroadcastFirst"; break;
case EOpSubgroupBallot: out.debug << "subgroupBallot"; break;
case EOpSubgroupInverseBallot: out.debug << "subgroupInverseBallot"; break;
case EOpSubgroupBallotBitExtract: out.debug << "subgroupBallotBitExtract"; break;
case EOpSubgroupBallotBitCount: out.debug << "subgroupBallotBitCount"; break;
case EOpSubgroupBallotInclusiveBitCount: out.debug << "subgroupBallotInclusiveBitCount"; break;
case EOpSubgroupBallotExclusiveBitCount: out.debug << "subgroupBallotExclusiveBitCount"; break;
case EOpSubgroupBallotFindLSB: out.debug << "subgroupBallotFindLSB"; break;
case EOpSubgroupBallotFindMSB: out.debug << "subgroupBallotFindMSB"; break;
case EOpSubgroupShuffle: out.debug << "subgroupShuffle"; break;
case EOpSubgroupShuffleXor: out.debug << "subgroupShuffleXor"; break;
case EOpSubgroupShuffleUp: out.debug << "subgroupShuffleUp"; break;
case EOpSubgroupShuffleDown: out.debug << "subgroupShuffleDown"; break;
case EOpSubgroupAdd: out.debug << "subgroupAdd"; break;
case EOpSubgroupMul: out.debug << "subgroupMul"; break;
case EOpSubgroupMin: out.debug << "subgroupMin"; break;
case EOpSubgroupMax: out.debug << "subgroupMax"; break;
case EOpSubgroupAnd: out.debug << "subgroupAnd"; break;
case EOpSubgroupOr: out.debug << "subgroupOr"; break;
case EOpSubgroupXor: out.debug << "subgroupXor"; break;
case EOpSubgroupInclusiveAdd: out.debug << "subgroupInclusiveAdd"; break;
case EOpSubgroupInclusiveMul: out.debug << "subgroupInclusiveMul"; break;
case EOpSubgroupInclusiveMin: out.debug << "subgroupInclusiveMin"; break;
case EOpSubgroupInclusiveMax: out.debug << "subgroupInclusiveMax"; break;
case EOpSubgroupInclusiveAnd: out.debug << "subgroupInclusiveAnd"; break;
case EOpSubgroupInclusiveOr: out.debug << "subgroupInclusiveOr"; break;
case EOpSubgroupInclusiveXor: out.debug << "subgroupInclusiveXor"; break;
case EOpSubgroupExclusiveAdd: out.debug << "subgroupExclusiveAdd"; break;
case EOpSubgroupExclusiveMul: out.debug << "subgroupExclusiveMul"; break;
case EOpSubgroupExclusiveMin: out.debug << "subgroupExclusiveMin"; break;
case EOpSubgroupExclusiveMax: out.debug << "subgroupExclusiveMax"; break;
case EOpSubgroupExclusiveAnd: out.debug << "subgroupExclusiveAnd"; break;
case EOpSubgroupExclusiveOr: out.debug << "subgroupExclusiveOr"; break;
case EOpSubgroupExclusiveXor: out.debug << "subgroupExclusiveXor"; break;
case EOpSubgroupClusteredAdd: out.debug << "subgroupClusteredAdd"; break;
case EOpSubgroupClusteredMul: out.debug << "subgroupClusteredMul"; break;
case EOpSubgroupClusteredMin: out.debug << "subgroupClusteredMin"; break;
case EOpSubgroupClusteredMax: out.debug << "subgroupClusteredMax"; break;
case EOpSubgroupClusteredAnd: out.debug << "subgroupClusteredAnd"; break;
case EOpSubgroupClusteredOr: out.debug << "subgroupClusteredOr"; break;
case EOpSubgroupClusteredXor: out.debug << "subgroupClusteredXor"; break;
case EOpSubgroupQuadBroadcast: out.debug << "subgroupQuadBroadcast"; break;
case EOpSubgroupQuadSwapHorizontal: out.debug << "subgroupQuadSwapHorizontal"; break;
case EOpSubgroupQuadSwapVertical: out.debug << "subgroupQuadSwapVertical"; break;
case EOpSubgroupQuadSwapDiagonal: out.debug << "subgroupQuadSwapDiagonal"; break;
case EOpClip: out.debug << "clip"; break;
case EOpIsFinite: out.debug << "isfinite"; break;
case EOpLog10: out.debug << "log10"; break;
......@@ -646,6 +697,12 @@ bool TOutputTraverser::visitAggregate(TVisit /* visit */, TIntermAggregate* node
case EOpWorkgroupMemoryBarrier: out.debug << "WorkgroupMemoryBarrier"; break;
case EOpWorkgroupMemoryBarrierWithGroupSync: out.debug << "WorkgroupMemoryBarrierWithGroupSync"; break;
case EOpSubgroupBarrier: out.debug << "subgroupBarrier"; break;
case EOpSubgroupMemoryBarrier: out.debug << "subgroupMemoryBarrier"; break;
case EOpSubgroupMemoryBarrierBuffer: out.debug << "subgroupMemoryBarrierBuffer"; break;
case EOpSubgroupMemoryBarrierImage: out.debug << "subgroupMemoryBarrierImage"; break;
case EOpSubgroupMemoryBarrierShared: out.debug << "subgroupMemoryBarrierShared"; break;
default: out.debug.message(EPrefixError, "Bad aggregation op");
}
......
......@@ -306,6 +306,14 @@ INSTANTIATE_TEST_CASE_P(
"spv.storageBuffer.vert",
"spv.precise.tese",
"spv.precise.tesc",
"spv.subgroupBasic.comp",
"spv.subgroupVote.comp",
"spv.subgroupBallot.comp",
"spv.subgroupShuffle.comp",
"spv.subgroupShuffleRelative.comp",
"spv.subgroupArithmetic.comp",
"spv.subgroupClustered.comp",
"spv.subgroupQuad.comp",
})),
FileNameAsCustomTestSuffix
);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment