Commit ffdd61c3 by John Kessenich

Merge branch 'support_latest_sm_60_ops'

n why this merge is necessary,
parents d487d4d0 ce443b3a
...@@ -1110,6 +1110,42 @@ local_size = (32, 16, 1) ...@@ -1110,6 +1110,42 @@ local_size = (32, 16, 1)
0:52 1 (const int) 0:52 1 (const int)
0:52 Constant: 0:52 Constant:
0:52 2 (const int) 0:52 2 (const int)
0:54 move second child to first child ( temp uint)
0:54 direct index ( temp uint)
0:54 u: direct index for structure ( temp 4-component vector of uint)
0:54 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:54 Constant:
0:54 0 (const uint)
0:54 direct index ( temp uint)
0:54 'dti' ( in 3-component vector of uint)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 subgroupBallotInclusiveBitCount ( temp uint)
0:54 subgroupBallot ( temp 4-component vector of uint)
0:54 Compare Equal ( temp bool)
0:54 direct index ( temp uint)
0:54 u: direct index for structure ( temp 4-component vector of uint)
0:54 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:54 Constant:
0:54 0 (const uint)
0:54 direct index ( temp uint)
0:54 'dti' ( in 3-component vector of uint)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const uint)
0:13 Function Definition: CSMain( ( temp void) 0:13 Function Definition: CSMain( ( temp void)
0:13 Function Parameters: 0:13 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -2237,6 +2273,42 @@ local_size = (32, 16, 1) ...@@ -2237,6 +2273,42 @@ local_size = (32, 16, 1)
0:52 1 (const int) 0:52 1 (const int)
0:52 Constant: 0:52 Constant:
0:52 2 (const int) 0:52 2 (const int)
0:54 move second child to first child ( temp uint)
0:54 direct index ( temp uint)
0:54 u: direct index for structure ( temp 4-component vector of uint)
0:54 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:54 Constant:
0:54 0 (const uint)
0:54 direct index ( temp uint)
0:54 'dti' ( in 3-component vector of uint)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 subgroupBallotInclusiveBitCount ( temp uint)
0:54 subgroupBallot ( temp 4-component vector of uint)
0:54 Compare Equal ( temp bool)
0:54 direct index ( temp uint)
0:54 u: direct index for structure ( temp 4-component vector of uint)
0:54 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:54 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:54 Constant:
0:54 0 (const uint)
0:54 direct index ( temp uint)
0:54 'dti' ( in 3-component vector of uint)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const int)
0:54 Constant:
0:54 0 (const uint)
0:13 Function Definition: CSMain( ( temp void) 0:13 Function Definition: CSMain( ( temp void)
0:13 Function Parameters: 0:13 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -2251,15 +2323,16 @@ local_size = (32, 16, 1) ...@@ -2251,15 +2323,16 @@ local_size = (32, 16, 1)
// Module Version 10000 // Module Version 10000
// Generated by (magic number): 80003 // Generated by (magic number): 80003
// Id's are bound by 358 // Id's are bound by 369
Capability Shader Capability Shader
Capability Float64 Capability Float64
Capability GroupNonUniform Capability GroupNonUniform
Capability GroupNonUniformArithmetic Capability GroupNonUniformArithmetic
Capability GroupNonUniformBallot
1: ExtInstImport "GLSL.std.450" 1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450 MemoryModel Logical GLSL450
EntryPoint GLCompute 4 "CSMain" 353 EntryPoint GLCompute 4 "CSMain" 364
ExecutionMode 4 LocalSize 32 16 1 ExecutionMode 4 LocalSize 32 16 1
Source HLSL 500 Source HLSL 500
Name 4 "CSMain" Name 4 "CSMain"
...@@ -2273,9 +2346,9 @@ local_size = (32, 16, 1) ...@@ -2273,9 +2346,9 @@ local_size = (32, 16, 1)
Name 22 "data" Name 22 "data"
MemberName 22(data) 0 "@data" MemberName 22(data) 0 "@data"
Name 24 "data" Name 24 "data"
Name 351 "dti" Name 362 "dti"
Name 353 "dti" Name 364 "dti"
Name 355 "param" Name 366 "param"
MemberDecorate 20(Types) 0 Offset 0 MemberDecorate 20(Types) 0 Offset 0
MemberDecorate 20(Types) 1 Offset 16 MemberDecorate 20(Types) 1 Offset 16
MemberDecorate 20(Types) 2 Offset 32 MemberDecorate 20(Types) 2 Offset 32
...@@ -2284,7 +2357,7 @@ local_size = (32, 16, 1) ...@@ -2284,7 +2357,7 @@ local_size = (32, 16, 1)
MemberDecorate 22(data) 0 Offset 0 MemberDecorate 22(data) 0 Offset 0
Decorate 22(data) BufferBlock Decorate 22(data) BufferBlock
Decorate 24(data) DescriptorSet 0 Decorate 24(data) DescriptorSet 0
Decorate 353(dti) BuiltIn GlobalInvocationId Decorate 364(dti) BuiltIn GlobalInvocationId
2: TypeVoid 2: TypeVoid
3: TypeFunction 2 3: TypeFunction 2
6: TypeInt 32 0 6: TypeInt 32 0
...@@ -2325,17 +2398,18 @@ local_size = (32, 16, 1) ...@@ -2325,17 +2398,18 @@ local_size = (32, 16, 1)
170: TypePointer Uniform 18(float) 170: TypePointer Uniform 18(float)
179: TypeVector 18(float) 2 179: TypeVector 18(float) 2
191: TypeVector 18(float) 3 191: TypeVector 18(float) 3
352: TypePointer Input 7(ivec3) 357: TypeBool
353(dti): 352(ptr) Variable Input 363: TypePointer Input 7(ivec3)
364(dti): 363(ptr) Variable Input
4(CSMain): 2 Function None 3 4(CSMain): 2 Function None 3
5: Label 5: Label
351(dti): 8(ptr) Variable Function 362(dti): 8(ptr) Variable Function
355(param): 8(ptr) Variable Function 366(param): 8(ptr) Variable Function
354: 7(ivec3) Load 353(dti) 365: 7(ivec3) Load 364(dti)
Store 351(dti) 354 Store 362(dti) 365
356: 7(ivec3) Load 351(dti) 367: 7(ivec3) Load 362(dti)
Store 355(param) 356 Store 366(param) 367
357: 2 FunctionCall 11(@CSMain(vu3;) 355(param) 368: 2 FunctionCall 11(@CSMain(vu3;) 366(param)
Return Return
FunctionEnd FunctionEnd
11(@CSMain(vu3;): 2 Function None 9 11(@CSMain(vu3;): 2 Function None 9
...@@ -2677,5 +2751,16 @@ local_size = (32, 16, 1) ...@@ -2677,5 +2751,16 @@ local_size = (32, 16, 1)
349: 19(fvec4) Load 348 349: 19(fvec4) Load 348
350: 19(fvec4) VectorShuffle 349 347 4 5 6 3 350: 19(fvec4) VectorShuffle 349 347 4 5 6 3
Store 348 350 Store 348 350
351: 27(ptr) AccessChain 10(dti) 26
352: 6(int) Load 351
353: 27(ptr) AccessChain 10(dti) 26
354: 6(int) Load 353
355: 42(ptr) AccessChain 24(data) 25 354 25 26
356: 6(int) Load 355
358: 357(bool) IEqual 356 26
359: 13(ivec4) GroupNonUniformBallot 35 358
360: 6(int) GroupNonUniformBallotBitCount 35 InclusiveScan 359
361: 42(ptr) AccessChain 24(data) 25 352 25 26
Store 361 360
Return Return
FunctionEnd FunctionEnd
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -7,7 +7,7 @@ gl_FragCoord origin is upper left ...@@ -7,7 +7,7 @@ gl_FragCoord origin is upper left
0:? Sequence 0:? Sequence
0:3 Test condition and select ( temp void) 0:3 Test condition and select ( temp void)
0:3 Condition 0:3 Condition
0:3 '@gl_HelperInvocation' ( in bool HelperInvocation) 0:3 subgroupElect ( temp bool)
0:3 true case 0:3 true case
0:? Sequence 0:? Sequence
0:5 Branch: Return with expression 0:5 Branch: Return with expression
...@@ -45,7 +45,7 @@ gl_FragCoord origin is upper left ...@@ -45,7 +45,7 @@ gl_FragCoord origin is upper left
0:? Sequence 0:? Sequence
0:3 Test condition and select ( temp void) 0:3 Test condition and select ( temp void)
0:3 Condition 0:3 Condition
0:3 '@gl_HelperInvocation' ( in bool HelperInvocation) 0:3 subgroupElect ( temp bool)
0:3 true case 0:3 true case
0:? Sequence 0:? Sequence
0:5 Branch: Return with expression 0:5 Branch: Return with expression
...@@ -76,16 +76,15 @@ gl_FragCoord origin is upper left ...@@ -76,16 +76,15 @@ gl_FragCoord origin is upper left
// Id's are bound by 30 // Id's are bound by 30
Capability Shader Capability Shader
Capability GroupNonUniform
1: ExtInstImport "GLSL.std.450" 1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450 MemoryModel Logical GLSL450
EntryPoint Fragment 4 "PixelShaderFunction" 13 28 EntryPoint Fragment 4 "PixelShaderFunction" 28
ExecutionMode 4 OriginUpperLeft ExecutionMode 4 OriginUpperLeft
Source HLSL 500 Source HLSL 500
Name 4 "PixelShaderFunction" Name 4 "PixelShaderFunction"
Name 9 "@PixelShaderFunction(" Name 9 "@PixelShaderFunction("
Name 13 "@gl_HelperInvocation"
Name 28 "@entryPointOutput" Name 28 "@entryPointOutput"
Decorate 13(@gl_HelperInvocation) BuiltIn HelperInvocation
Decorate 28(@entryPointOutput) Location 0 Decorate 28(@entryPointOutput) Location 0
2: TypeVoid 2: TypeVoid
3: TypeFunction 2 3: TypeFunction 2
...@@ -93,8 +92,8 @@ gl_FragCoord origin is upper left ...@@ -93,8 +92,8 @@ gl_FragCoord origin is upper left
7: TypeVector 6(float) 4 7: TypeVector 6(float) 4
8: TypeFunction 7(fvec4) 8: TypeFunction 7(fvec4)
11: TypeBool 11: TypeBool
12: TypePointer Input 11(bool) 12: TypeInt 32 0
13(@gl_HelperInvocation): 12(ptr) Variable Input 13: 12(int) Constant 3
17: 6(float) Constant 1065353216 17: 6(float) Constant 1065353216
18: 6(float) Constant 1073741824 18: 6(float) Constant 1073741824
19: 6(float) Constant 1077936128 19: 6(float) Constant 1077936128
...@@ -111,7 +110,7 @@ gl_FragCoord origin is upper left ...@@ -111,7 +110,7 @@ gl_FragCoord origin is upper left
FunctionEnd FunctionEnd
9(@PixelShaderFunction(): 7(fvec4) Function None 8 9(@PixelShaderFunction(): 7(fvec4) Function None 8
10: Label 10: Label
14: 11(bool) Load 13(@gl_HelperInvocation) 14: 11(bool) GroupNonUniformElect 13
SelectionMerge 16 None SelectionMerge 16 None
BranchConditional 14 15 23 BranchConditional 14 15 23
15: Label 15: Label
......
...@@ -3042,6 +3042,42 @@ local_size = (32, 16, 1) ...@@ -3042,6 +3042,42 @@ local_size = (32, 16, 1)
0:122 1 (const int) 0:122 1 (const int)
0:122 Constant: 0:122 Constant:
0:122 2 (const int) 0:122 2 (const int)
0:124 move second child to first child ( temp uint)
0:124 direct index ( temp uint)
0:124 u: direct index for structure ( temp 4-component vector of uint)
0:124 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:124 Constant:
0:124 0 (const uint)
0:124 direct index ( temp uint)
0:124 'dti' ( in 3-component vector of uint)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 subgroupBallotBitCount ( temp uint)
0:124 subgroupBallot ( temp 4-component vector of uint)
0:124 Compare Equal ( temp bool)
0:124 direct index ( temp uint)
0:124 u: direct index for structure ( temp 4-component vector of uint)
0:124 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:124 Constant:
0:124 0 (const uint)
0:124 direct index ( temp uint)
0:124 'dti' ( in 3-component vector of uint)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const uint)
0:13 Function Definition: CSMain( ( temp void) 0:13 Function Definition: CSMain( ( temp void)
0:13 Function Parameters: 0:13 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -6101,6 +6137,42 @@ local_size = (32, 16, 1) ...@@ -6101,6 +6137,42 @@ local_size = (32, 16, 1)
0:122 1 (const int) 0:122 1 (const int)
0:122 Constant: 0:122 Constant:
0:122 2 (const int) 0:122 2 (const int)
0:124 move second child to first child ( temp uint)
0:124 direct index ( temp uint)
0:124 u: direct index for structure ( temp 4-component vector of uint)
0:124 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:124 Constant:
0:124 0 (const uint)
0:124 direct index ( temp uint)
0:124 'dti' ( in 3-component vector of uint)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 subgroupBallotBitCount ( temp uint)
0:124 subgroupBallot ( temp 4-component vector of uint)
0:124 Compare Equal ( temp bool)
0:124 direct index ( temp uint)
0:124 u: direct index for structure ( temp 4-component vector of uint)
0:124 indirect index (layout( row_major std430) buffer structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d})
0:124 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of structure{ temp 4-component vector of uint u, temp 4-component vector of int i, temp 4-component vector of float f, temp 4-component vector of double d} @data})
0:124 Constant:
0:124 0 (const uint)
0:124 direct index ( temp uint)
0:124 'dti' ( in 3-component vector of uint)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const int)
0:124 Constant:
0:124 0 (const uint)
0:13 Function Definition: CSMain( ( temp void) 0:13 Function Definition: CSMain( ( temp void)
0:13 Function Parameters: 0:13 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -6115,15 +6187,16 @@ local_size = (32, 16, 1) ...@@ -6115,15 +6187,16 @@ local_size = (32, 16, 1)
// Module Version 10000 // Module Version 10000
// Generated by (magic number): 80003 // Generated by (magic number): 80003
// Id's are bound by 890 // Id's are bound by 901
Capability Shader Capability Shader
Capability Float64 Capability Float64
Capability GroupNonUniform Capability GroupNonUniform
Capability GroupNonUniformArithmetic Capability GroupNonUniformArithmetic
Capability GroupNonUniformBallot
1: ExtInstImport "GLSL.std.450" 1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450 MemoryModel Logical GLSL450
EntryPoint GLCompute 4 "CSMain" 885 EntryPoint GLCompute 4 "CSMain" 896
ExecutionMode 4 LocalSize 32 16 1 ExecutionMode 4 LocalSize 32 16 1
Source HLSL 500 Source HLSL 500
Name 4 "CSMain" Name 4 "CSMain"
...@@ -6137,9 +6210,9 @@ local_size = (32, 16, 1) ...@@ -6137,9 +6210,9 @@ local_size = (32, 16, 1)
Name 22 "data" Name 22 "data"
MemberName 22(data) 0 "@data" MemberName 22(data) 0 "@data"
Name 24 "data" Name 24 "data"
Name 883 "dti" Name 894 "dti"
Name 885 "dti" Name 896 "dti"
Name 887 "param" Name 898 "param"
MemberDecorate 20(Types) 0 Offset 0 MemberDecorate 20(Types) 0 Offset 0
MemberDecorate 20(Types) 1 Offset 16 MemberDecorate 20(Types) 1 Offset 16
MemberDecorate 20(Types) 2 Offset 32 MemberDecorate 20(Types) 2 Offset 32
...@@ -6148,7 +6221,7 @@ local_size = (32, 16, 1) ...@@ -6148,7 +6221,7 @@ local_size = (32, 16, 1)
MemberDecorate 22(data) 0 Offset 0 MemberDecorate 22(data) 0 Offset 0
Decorate 22(data) BufferBlock Decorate 22(data) BufferBlock
Decorate 24(data) DescriptorSet 0 Decorate 24(data) DescriptorSet 0
Decorate 885(dti) BuiltIn GlobalInvocationId Decorate 896(dti) BuiltIn GlobalInvocationId
2: TypeVoid 2: TypeVoid
3: TypeFunction 2 3: TypeFunction 2
6: TypeInt 32 0 6: TypeInt 32 0
...@@ -6189,17 +6262,18 @@ local_size = (32, 16, 1) ...@@ -6189,17 +6262,18 @@ local_size = (32, 16, 1)
170: TypePointer Uniform 18(float) 170: TypePointer Uniform 18(float)
179: TypeVector 18(float) 2 179: TypeVector 18(float) 2
191: TypeVector 18(float) 3 191: TypeVector 18(float) 3
884: TypePointer Input 7(ivec3) 889: TypeBool
885(dti): 884(ptr) Variable Input 895: TypePointer Input 7(ivec3)
896(dti): 895(ptr) Variable Input
4(CSMain): 2 Function None 3 4(CSMain): 2 Function None 3
5: Label 5: Label
883(dti): 8(ptr) Variable Function 894(dti): 8(ptr) Variable Function
887(param): 8(ptr) Variable Function 898(param): 8(ptr) Variable Function
886: 7(ivec3) Load 885(dti) 897: 7(ivec3) Load 896(dti)
Store 883(dti) 886 Store 894(dti) 897
888: 7(ivec3) Load 883(dti) 899: 7(ivec3) Load 894(dti)
Store 887(param) 888 Store 898(param) 899
889: 2 FunctionCall 11(@CSMain(vu3;) 887(param) 900: 2 FunctionCall 11(@CSMain(vu3;) 898(param)
Return Return
FunctionEnd FunctionEnd
11(@CSMain(vu3;): 2 Function None 9 11(@CSMain(vu3;): 2 Function None 9
...@@ -7129,5 +7203,16 @@ local_size = (32, 16, 1) ...@@ -7129,5 +7203,16 @@ local_size = (32, 16, 1)
881: 15(ivec4) Load 880 881: 15(ivec4) Load 880
882: 15(ivec4) VectorShuffle 881 879 4 5 6 3 882: 15(ivec4) VectorShuffle 881 879 4 5 6 3
Store 880 882 Store 880 882
883: 27(ptr) AccessChain 10(dti) 26
884: 6(int) Load 883
885: 27(ptr) AccessChain 10(dti) 26
886: 6(int) Load 885
887: 42(ptr) AccessChain 24(data) 25 886 25 26
888: 6(int) Load 887
890: 889(bool) IEqual 888 26
891: 13(ivec4) GroupNonUniformBallot 35 890
892: 6(int) GroupNonUniformBallotBitCount 35 Reduce 891
893: 42(ptr) AccessChain 24(data) 25 884 25 26
Store 893 892
Return Return
FunctionEnd FunctionEnd
...@@ -16,8 +16,8 @@ local_size = (32, 16, 1) ...@@ -16,8 +16,8 @@ local_size = (32, 16, 1)
0:6 'dti' ( in 3-component vector of uint) 0:6 'dti' ( in 3-component vector of uint)
0:6 Constant: 0:6 Constant:
0:6 0 (const int) 0:6 0 (const int)
0:6 packUint2x32 ( temp uint64_t) 0:6 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:6 vector swizzle ( temp 2-component vector of uint) 0:6 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:6 subgroupBallot ( temp 4-component vector of uint) 0:6 subgroupBallot ( temp 4-component vector of uint)
0:6 subgroupAny ( temp bool) 0:6 subgroupAny ( temp bool)
0:6 Compare Equal ( temp bool) 0:6 Compare Equal ( temp bool)
...@@ -27,11 +27,6 @@ local_size = (32, 16, 1) ...@@ -27,11 +27,6 @@ local_size = (32, 16, 1)
0:6 0 (const int) 0:6 0 (const int)
0:6 Constant: 0:6 Constant:
0:6 0 (const uint) 0:6 0 (const uint)
0:6 Sequence
0:6 Constant:
0:6 0 (const int)
0:6 Constant:
0:6 1 (const int)
0:7 move second child to first child ( temp uint64_t) 0:7 move second child to first child ( temp uint64_t)
0:7 indirect index (layout( row_major std430) buffer uint64_t) 0:7 indirect index (layout( row_major std430) buffer uint64_t)
0:7 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t) 0:7 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
...@@ -42,8 +37,8 @@ local_size = (32, 16, 1) ...@@ -42,8 +37,8 @@ local_size = (32, 16, 1)
0:7 'dti' ( in 3-component vector of uint) 0:7 'dti' ( in 3-component vector of uint)
0:7 Constant: 0:7 Constant:
0:7 1 (const int) 0:7 1 (const int)
0:7 packUint2x32 ( temp uint64_t) 0:7 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:7 vector swizzle ( temp 2-component vector of uint) 0:7 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:7 subgroupBallot ( temp 4-component vector of uint) 0:7 subgroupBallot ( temp 4-component vector of uint)
0:7 subgroupAll ( temp bool) 0:7 subgroupAll ( temp bool)
0:7 Compare Equal ( temp bool) 0:7 Compare Equal ( temp bool)
...@@ -53,11 +48,6 @@ local_size = (32, 16, 1) ...@@ -53,11 +48,6 @@ local_size = (32, 16, 1)
0:7 1 (const int) 0:7 1 (const int)
0:7 Constant: 0:7 Constant:
0:7 0 (const uint) 0:7 0 (const uint)
0:7 Sequence
0:7 Constant:
0:7 0 (const int)
0:7 Constant:
0:7 1 (const int)
0:8 move second child to first child ( temp uint64_t) 0:8 move second child to first child ( temp uint64_t)
0:8 indirect index (layout( row_major std430) buffer uint64_t) 0:8 indirect index (layout( row_major std430) buffer uint64_t)
0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t) 0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
...@@ -68,8 +58,8 @@ local_size = (32, 16, 1) ...@@ -68,8 +58,8 @@ local_size = (32, 16, 1)
0:8 'dti' ( in 3-component vector of uint) 0:8 'dti' ( in 3-component vector of uint)
0:8 Constant: 0:8 Constant:
0:8 2 (const int) 0:8 2 (const int)
0:8 packUint2x32 ( temp uint64_t) 0:8 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:8 vector swizzle ( temp 2-component vector of uint) 0:8 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:8 subgroupBallot ( temp 4-component vector of uint) 0:8 subgroupBallot ( temp 4-component vector of uint)
0:8 subgroupAllEqual ( temp bool) 0:8 subgroupAllEqual ( temp bool)
0:8 Compare Equal ( temp bool) 0:8 Compare Equal ( temp bool)
...@@ -79,11 +69,24 @@ local_size = (32, 16, 1) ...@@ -79,11 +69,24 @@ local_size = (32, 16, 1)
0:8 2 (const int) 0:8 2 (const int)
0:8 Constant: 0:8 Constant:
0:8 0 (const uint) 0:8 0 (const uint)
0:8 Sequence 0:9 move second child to first child ( temp uint64_t)
0:8 Constant: 0:9 indirect index (layout( row_major std430) buffer uint64_t)
0:8 0 (const int) 0:9 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
0:8 Constant: 0:9 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of uint64_t @data})
0:8 1 (const int) 0:9 Constant:
0:9 0 (const uint)
0:9 direct index ( temp uint)
0:9 'dti' ( in 3-component vector of uint)
0:9 Constant:
0:9 2 (const int)
0:9 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:9 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:9 subgroupBallot ( temp 4-component vector of uint)
0:9 subgroupAllEqual ( temp bool)
0:9 direct index ( temp uint)
0:9 'dti' ( in 3-component vector of uint)
0:9 Constant:
0:9 2 (const int)
0:5 Function Definition: CSMain( ( temp void) 0:5 Function Definition: CSMain( ( temp void)
0:5 Function Parameters: 0:5 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -117,8 +120,8 @@ local_size = (32, 16, 1) ...@@ -117,8 +120,8 @@ local_size = (32, 16, 1)
0:6 'dti' ( in 3-component vector of uint) 0:6 'dti' ( in 3-component vector of uint)
0:6 Constant: 0:6 Constant:
0:6 0 (const int) 0:6 0 (const int)
0:6 packUint2x32 ( temp uint64_t) 0:6 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:6 vector swizzle ( temp 2-component vector of uint) 0:6 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:6 subgroupBallot ( temp 4-component vector of uint) 0:6 subgroupBallot ( temp 4-component vector of uint)
0:6 subgroupAny ( temp bool) 0:6 subgroupAny ( temp bool)
0:6 Compare Equal ( temp bool) 0:6 Compare Equal ( temp bool)
...@@ -128,11 +131,6 @@ local_size = (32, 16, 1) ...@@ -128,11 +131,6 @@ local_size = (32, 16, 1)
0:6 0 (const int) 0:6 0 (const int)
0:6 Constant: 0:6 Constant:
0:6 0 (const uint) 0:6 0 (const uint)
0:6 Sequence
0:6 Constant:
0:6 0 (const int)
0:6 Constant:
0:6 1 (const int)
0:7 move second child to first child ( temp uint64_t) 0:7 move second child to first child ( temp uint64_t)
0:7 indirect index (layout( row_major std430) buffer uint64_t) 0:7 indirect index (layout( row_major std430) buffer uint64_t)
0:7 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t) 0:7 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
...@@ -143,8 +141,8 @@ local_size = (32, 16, 1) ...@@ -143,8 +141,8 @@ local_size = (32, 16, 1)
0:7 'dti' ( in 3-component vector of uint) 0:7 'dti' ( in 3-component vector of uint)
0:7 Constant: 0:7 Constant:
0:7 1 (const int) 0:7 1 (const int)
0:7 packUint2x32 ( temp uint64_t) 0:7 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:7 vector swizzle ( temp 2-component vector of uint) 0:7 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:7 subgroupBallot ( temp 4-component vector of uint) 0:7 subgroupBallot ( temp 4-component vector of uint)
0:7 subgroupAll ( temp bool) 0:7 subgroupAll ( temp bool)
0:7 Compare Equal ( temp bool) 0:7 Compare Equal ( temp bool)
...@@ -154,11 +152,6 @@ local_size = (32, 16, 1) ...@@ -154,11 +152,6 @@ local_size = (32, 16, 1)
0:7 1 (const int) 0:7 1 (const int)
0:7 Constant: 0:7 Constant:
0:7 0 (const uint) 0:7 0 (const uint)
0:7 Sequence
0:7 Constant:
0:7 0 (const int)
0:7 Constant:
0:7 1 (const int)
0:8 move second child to first child ( temp uint64_t) 0:8 move second child to first child ( temp uint64_t)
0:8 indirect index (layout( row_major std430) buffer uint64_t) 0:8 indirect index (layout( row_major std430) buffer uint64_t)
0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t) 0:8 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
...@@ -169,8 +162,8 @@ local_size = (32, 16, 1) ...@@ -169,8 +162,8 @@ local_size = (32, 16, 1)
0:8 'dti' ( in 3-component vector of uint) 0:8 'dti' ( in 3-component vector of uint)
0:8 Constant: 0:8 Constant:
0:8 2 (const int) 0:8 2 (const int)
0:8 packUint2x32 ( temp uint64_t) 0:8 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:8 vector swizzle ( temp 2-component vector of uint) 0:8 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:8 subgroupBallot ( temp 4-component vector of uint) 0:8 subgroupBallot ( temp 4-component vector of uint)
0:8 subgroupAllEqual ( temp bool) 0:8 subgroupAllEqual ( temp bool)
0:8 Compare Equal ( temp bool) 0:8 Compare Equal ( temp bool)
...@@ -180,11 +173,24 @@ local_size = (32, 16, 1) ...@@ -180,11 +173,24 @@ local_size = (32, 16, 1)
0:8 2 (const int) 0:8 2 (const int)
0:8 Constant: 0:8 Constant:
0:8 0 (const uint) 0:8 0 (const uint)
0:8 Sequence 0:9 move second child to first child ( temp uint64_t)
0:8 Constant: 0:9 indirect index (layout( row_major std430) buffer uint64_t)
0:8 0 (const int) 0:9 @data: direct index for structure (layout( row_major std430) buffer implicitly-sized array of uint64_t)
0:8 Constant: 0:9 'data' (layout( row_major std430) buffer block{layout( row_major std430) buffer implicitly-sized array of uint64_t @data})
0:8 1 (const int) 0:9 Constant:
0:9 0 (const uint)
0:9 direct index ( temp uint)
0:9 'dti' ( in 3-component vector of uint)
0:9 Constant:
0:9 2 (const int)
0:9 Construct uint64 (layout( row_major std430) buffer uint64_t)
0:9 Convert uint to uint64 ( temp 4-component vector of uint64_t)
0:9 subgroupBallot ( temp 4-component vector of uint)
0:9 subgroupAllEqual ( temp bool)
0:9 direct index ( temp uint)
0:9 'dti' ( in 3-component vector of uint)
0:9 Constant:
0:9 2 (const int)
0:5 Function Definition: CSMain( ( temp void) 0:5 Function Definition: CSMain( ( temp void)
0:5 Function Parameters: 0:5 Function Parameters:
0:? Sequence 0:? Sequence
...@@ -199,7 +205,7 @@ local_size = (32, 16, 1) ...@@ -199,7 +205,7 @@ local_size = (32, 16, 1)
// Module Version 10000 // Module Version 10000
// Generated by (magic number): 80003 // Generated by (magic number): 80003
// Id's are bound by 66 // Id's are bound by 75
Capability Shader Capability Shader
Capability Int64 Capability Int64
...@@ -208,7 +214,7 @@ local_size = (32, 16, 1) ...@@ -208,7 +214,7 @@ local_size = (32, 16, 1)
Capability GroupNonUniformBallot Capability GroupNonUniformBallot
1: ExtInstImport "GLSL.std.450" 1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450 MemoryModel Logical GLSL450
EntryPoint GLCompute 4 "CSMain" 61 EntryPoint GLCompute 4 "CSMain" 70
ExecutionMode 4 LocalSize 32 16 1 ExecutionMode 4 LocalSize 32 16 1
Source HLSL 500 Source HLSL 500
Name 4 "CSMain" Name 4 "CSMain"
...@@ -217,14 +223,14 @@ local_size = (32, 16, 1) ...@@ -217,14 +223,14 @@ local_size = (32, 16, 1)
Name 15 "data" Name 15 "data"
MemberName 15(data) 0 "@data" MemberName 15(data) 0 "@data"
Name 17 "data" Name 17 "data"
Name 59 "dti" Name 68 "dti"
Name 61 "dti" Name 70 "dti"
Name 63 "param" Name 72 "param"
Decorate 14 ArrayStride 8 Decorate 14 ArrayStride 8
MemberDecorate 15(data) 0 Offset 0 MemberDecorate 15(data) 0 Offset 0
Decorate 15(data) BufferBlock Decorate 15(data) BufferBlock
Decorate 17(data) DescriptorSet 0 Decorate 17(data) DescriptorSet 0
Decorate 61(dti) BuiltIn GlobalInvocationId Decorate 70(dti) BuiltIn GlobalInvocationId
2: TypeVoid 2: TypeVoid
3: TypeFunction 2 3: TypeFunction 2
6: TypeInt 32 0 6: TypeInt 32 0
...@@ -243,21 +249,21 @@ local_size = (32, 16, 1) ...@@ -243,21 +249,21 @@ local_size = (32, 16, 1)
26: TypeBool 26: TypeBool
28: 6(int) Constant 3 28: 6(int) Constant 3
30: TypeVector 6(int) 4 30: TypeVector 6(int) 4
32: TypeVector 6(int) 2 32: TypeVector 13(int) 4
35: TypePointer Uniform 13(int) 35: TypePointer Uniform 13(int)
37: 6(int) Constant 1 37: 6(int) Constant 1
48: 6(int) Constant 2 48: 6(int) Constant 2
60: TypePointer Input 7(ivec3) 69: TypePointer Input 7(ivec3)
61(dti): 60(ptr) Variable Input 70(dti): 69(ptr) Variable Input
4(CSMain): 2 Function None 3 4(CSMain): 2 Function None 3
5: Label 5: Label
59(dti): 8(ptr) Variable Function 68(dti): 8(ptr) Variable Function
63(param): 8(ptr) Variable Function 72(param): 8(ptr) Variable Function
62: 7(ivec3) Load 61(dti) 71: 7(ivec3) Load 70(dti)
Store 59(dti) 62 Store 68(dti) 71
64: 7(ivec3) Load 59(dti) 73: 7(ivec3) Load 68(dti)
Store 63(param) 64 Store 72(param) 73
65: 2 FunctionCall 11(@CSMain(vu3;) 63(param) 74: 2 FunctionCall 11(@CSMain(vu3;) 72(param)
Return Return
FunctionEnd FunctionEnd
11(@CSMain(vu3;): 2 Function None 9 11(@CSMain(vu3;): 2 Function None 9
...@@ -270,8 +276,8 @@ local_size = (32, 16, 1) ...@@ -270,8 +276,8 @@ local_size = (32, 16, 1)
27: 26(bool) IEqual 25 20 27: 26(bool) IEqual 25 20
29: 26(bool) GroupNonUniformAny 28 27 29: 26(bool) GroupNonUniformAny 28 27
31: 30(ivec4) GroupNonUniformBallot 28 29 31: 30(ivec4) GroupNonUniformBallot 28 29
33: 32(ivec2) VectorShuffle 31 31 0 1 33: 32(ivec4) UConvert 31
34: 13(int) Bitcast 33 34: 13(int) CompositeExtract 33 0
36: 35(ptr) AccessChain 17(data) 19 23 36: 35(ptr) AccessChain 17(data) 19 23
Store 36 34 Store 36 34
38: 21(ptr) AccessChain 10(dti) 37 38: 21(ptr) AccessChain 10(dti) 37
...@@ -281,8 +287,8 @@ local_size = (32, 16, 1) ...@@ -281,8 +287,8 @@ local_size = (32, 16, 1)
42: 26(bool) IEqual 41 20 42: 26(bool) IEqual 41 20
43: 26(bool) GroupNonUniformAll 28 42 43: 26(bool) GroupNonUniformAll 28 42
44: 30(ivec4) GroupNonUniformBallot 28 43 44: 30(ivec4) GroupNonUniformBallot 28 43
45: 32(ivec2) VectorShuffle 44 44 0 1 45: 32(ivec4) UConvert 44
46: 13(int) Bitcast 45 46: 13(int) CompositeExtract 45 0
47: 35(ptr) AccessChain 17(data) 19 39 47: 35(ptr) AccessChain 17(data) 19 39
Store 47 46 Store 47 46
49: 21(ptr) AccessChain 10(dti) 48 49: 21(ptr) AccessChain 10(dti) 48
...@@ -292,9 +298,19 @@ local_size = (32, 16, 1) ...@@ -292,9 +298,19 @@ local_size = (32, 16, 1)
53: 26(bool) IEqual 52 20 53: 26(bool) IEqual 52 20
54: 26(bool) GroupNonUniformAllEqual 28 53 54: 26(bool) GroupNonUniformAllEqual 28 53
55: 30(ivec4) GroupNonUniformBallot 28 54 55: 30(ivec4) GroupNonUniformBallot 28 54
56: 32(ivec2) VectorShuffle 55 55 0 1 56: 32(ivec4) UConvert 55
57: 13(int) Bitcast 56 57: 13(int) CompositeExtract 56 0
58: 35(ptr) AccessChain 17(data) 19 50 58: 35(ptr) AccessChain 17(data) 19 50
Store 58 57 Store 58 57
59: 21(ptr) AccessChain 10(dti) 48
60: 6(int) Load 59
61: 21(ptr) AccessChain 10(dti) 48
62: 6(int) Load 61
63: 26(bool) GroupNonUniformAllEqual 28 62
64: 30(ivec4) GroupNonUniformBallot 28 63
65: 32(ivec4) UConvert 64
66: 13(int) CompositeExtract 65 0
67: 35(ptr) AccessChain 17(data) 19 60
Store 67 66
Return Return
FunctionEnd FunctionEnd
RWStructuredBuffer<uint> data;
[numthreads(32, 16, 1)]
void CSMain()
{
data[WaveGetOrderedIndex()] = 1;
}
float4 PixelShaderFunction() : COLOR0
{
if (0 == WaveGetOrderedIndex())
{
return float4(1, 2, 3, 4);
}
else
{
return float4(4, 3, 2, 1);
}
}
RWStructuredBuffer<uint> data;
[numthreads(32, 16, 1)]
void CSMain()
{
uint i = 42;
data[GlobalOrderedCountIncrement(i)] = 1;
}
float4 PixelShaderFunction() : COLOR0
{
uint i = 42;
if (0 == GlobalOrderedCountIncrement(i))
{
return float4(1, 2, 3, 4);
}
else
{
return float4(4, 3, 2, 1);
}
}
...@@ -50,4 +50,6 @@ void CSMain(uint3 dti : SV_DispatchThreadID) ...@@ -50,4 +50,6 @@ void CSMain(uint3 dti : SV_DispatchThreadID)
data[dti.x].d.x = WavePrefixProduct(data[dti.x].d.x); data[dti.x].d.x = WavePrefixProduct(data[dti.x].d.x);
data[dti.x].d.xy = WavePrefixProduct(data[dti.x].d.xy); data[dti.x].d.xy = WavePrefixProduct(data[dti.x].d.xy);
data[dti.x].d.xyz = WavePrefixProduct(data[dti.x].d.xyz); data[dti.x].d.xyz = WavePrefixProduct(data[dti.x].d.xyz);
data[dti.x].u.x = WavePrefixCountBits(data[dti.x].u.x == 0);
} }
...@@ -91,43 +91,63 @@ void CSMain(uint3 dti : SV_DispatchThreadID) ...@@ -91,43 +91,63 @@ void CSMain(uint3 dti : SV_DispatchThreadID)
data[dti.x].d.xy = QuadReadLaneAt(data[dti.x].d.xy, 3); data[dti.x].d.xy = QuadReadLaneAt(data[dti.x].d.xy, 3);
data[dti.x].d.xyz = QuadReadLaneAt(data[dti.x].d.xyz, 3); data[dti.x].d.xyz = QuadReadLaneAt(data[dti.x].d.xyz, 3);
data[dti.x].u = QuadSwapX(data[dti.x].u); data[dti.x].u = QuadReadAcrossX(data[dti.x].u);
data[dti.x].u.x = QuadSwapX(data[dti.x].u.x); data[dti.x].u.x = QuadReadAcrossX(data[dti.x].u.x);
data[dti.x].u.xy = QuadSwapX(data[dti.x].u.xy); data[dti.x].u.xy = QuadReadAcrossX(data[dti.x].u.xy);
data[dti.x].u.xyz = QuadSwapX(data[dti.x].u.xyz); data[dti.x].u.xyz = QuadReadAcrossX(data[dti.x].u.xyz);
data[dti.x].i = QuadSwapX(data[dti.x].i); data[dti.x].i = QuadReadAcrossX(data[dti.x].i);
data[dti.x].i.x = QuadSwapX(data[dti.x].i.x); data[dti.x].i.x = QuadReadAcrossX(data[dti.x].i.x);
data[dti.x].i.xy = QuadSwapX(data[dti.x].i.xy); data[dti.x].i.xy = QuadReadAcrossX(data[dti.x].i.xy);
data[dti.x].i.xyz = QuadSwapX(data[dti.x].i.xyz); data[dti.x].i.xyz = QuadReadAcrossX(data[dti.x].i.xyz);
data[dti.x].f = QuadSwapX(data[dti.x].f); data[dti.x].f = QuadReadAcrossX(data[dti.x].f);
data[dti.x].f.x = QuadSwapX(data[dti.x].f.x); data[dti.x].f.x = QuadReadAcrossX(data[dti.x].f.x);
data[dti.x].f.xy = QuadSwapX(data[dti.x].f.xy); data[dti.x].f.xy = QuadReadAcrossX(data[dti.x].f.xy);
data[dti.x].f.xyz = QuadSwapX(data[dti.x].f.xyz); data[dti.x].f.xyz = QuadReadAcrossX(data[dti.x].f.xyz);
data[dti.x].d = QuadSwapX(data[dti.x].d); data[dti.x].d = QuadReadAcrossX(data[dti.x].d);
data[dti.x].d.x = QuadSwapX(data[dti.x].d.x); data[dti.x].d.x = QuadReadAcrossX(data[dti.x].d.x);
data[dti.x].d.xy = QuadSwapX(data[dti.x].d.xy); data[dti.x].d.xy = QuadReadAcrossX(data[dti.x].d.xy);
data[dti.x].d.xyz = QuadSwapX(data[dti.x].d.xyz); data[dti.x].d.xyz = QuadReadAcrossX(data[dti.x].d.xyz);
data[dti.x].u = QuadSwapY(data[dti.x].u); data[dti.x].u = QuadReadAcrossY(data[dti.x].u);
data[dti.x].u.x = QuadSwapY(data[dti.x].u.x); data[dti.x].u.x = QuadReadAcrossY(data[dti.x].u.x);
data[dti.x].u.xy = QuadSwapY(data[dti.x].u.xy); data[dti.x].u.xy = QuadReadAcrossY(data[dti.x].u.xy);
data[dti.x].u.xyz = QuadSwapY(data[dti.x].u.xyz); data[dti.x].u.xyz = QuadReadAcrossY(data[dti.x].u.xyz);
data[dti.x].i = QuadSwapY(data[dti.x].i); data[dti.x].i = QuadReadAcrossY(data[dti.x].i);
data[dti.x].i.x = QuadSwapY(data[dti.x].i.x); data[dti.x].i.x = QuadReadAcrossY(data[dti.x].i.x);
data[dti.x].i.xy = QuadSwapY(data[dti.x].i.xy); data[dti.x].i.xy = QuadReadAcrossY(data[dti.x].i.xy);
data[dti.x].i.xyz = QuadSwapY(data[dti.x].i.xyz); data[dti.x].i.xyz = QuadReadAcrossY(data[dti.x].i.xyz);
data[dti.x].f = QuadSwapY(data[dti.x].f); data[dti.x].f = QuadReadAcrossY(data[dti.x].f);
data[dti.x].f.x = QuadSwapY(data[dti.x].f.x); data[dti.x].f.x = QuadReadAcrossY(data[dti.x].f.x);
data[dti.x].f.xy = QuadSwapY(data[dti.x].f.xy); data[dti.x].f.xy = QuadReadAcrossY(data[dti.x].f.xy);
data[dti.x].f.xyz = QuadSwapY(data[dti.x].f.xyz); data[dti.x].f.xyz = QuadReadAcrossY(data[dti.x].f.xyz);
data[dti.x].d = QuadSwapY(data[dti.x].d); data[dti.x].d = QuadReadAcrossY(data[dti.x].d);
data[dti.x].d.x = QuadSwapY(data[dti.x].d.x); data[dti.x].d.x = QuadReadAcrossY(data[dti.x].d.x);
data[dti.x].d.xy = QuadSwapY(data[dti.x].d.xy); data[dti.x].d.xy = QuadReadAcrossY(data[dti.x].d.xy);
data[dti.x].d.xyz = QuadSwapY(data[dti.x].d.xyz); data[dti.x].d.xyz = QuadReadAcrossY(data[dti.x].d.xyz);
data[dti.x].u = QuadReadAcrossDiagonal(data[dti.x].u);
data[dti.x].u.x = QuadReadAcrossDiagonal(data[dti.x].u.x);
data[dti.x].u.xy = QuadReadAcrossDiagonal(data[dti.x].u.xy);
data[dti.x].u.xyz = QuadReadAcrossDiagonal(data[dti.x].u.xyz);
data[dti.x].i = QuadReadAcrossDiagonal(data[dti.x].i);
data[dti.x].i.x = QuadReadAcrossDiagonal(data[dti.x].i.x);
data[dti.x].i.xy = QuadReadAcrossDiagonal(data[dti.x].i.xy);
data[dti.x].i.xyz = QuadReadAcrossDiagonal(data[dti.x].i.xyz);
data[dti.x].f = QuadReadAcrossDiagonal(data[dti.x].f);
data[dti.x].f.x = QuadReadAcrossDiagonal(data[dti.x].f.x);
data[dti.x].f.xy = QuadReadAcrossDiagonal(data[dti.x].f.xy);
data[dti.x].f.xyz = QuadReadAcrossDiagonal(data[dti.x].f.xyz);
data[dti.x].d = QuadReadAcrossDiagonal(data[dti.x].d);
data[dti.x].d.x = QuadReadAcrossDiagonal(data[dti.x].d.x);
data[dti.x].d.xy = QuadReadAcrossDiagonal(data[dti.x].d.xy);
data[dti.x].d.xyz = QuadReadAcrossDiagonal(data[dti.x].d.xyz);
} }
...@@ -3,5 +3,5 @@ RWStructuredBuffer<uint> data; ...@@ -3,5 +3,5 @@ RWStructuredBuffer<uint> data;
[numthreads(32, 16, 1)] [numthreads(32, 16, 1)]
void CSMain() void CSMain()
{ {
data[WaveGetLaneIndex()] = (WaveOnce()) ? WaveGetLaneCount() : 0; data[WaveGetLaneIndex()] = (WaveIsFirstLane()) ? WaveGetLaneCount() : 0;
} }
float4 PixelShaderFunction() : COLOR0 float4 PixelShaderFunction() : COLOR0
{ {
if (WaveIsHelperLane()) if (WaveIsFirstLane())
{ {
return float4(1, 2, 3, 4); return float4(1, 2, 3, 4);
} }
......
...@@ -11,113 +11,115 @@ RWStructuredBuffer<Types> data; ...@@ -11,113 +11,115 @@ RWStructuredBuffer<Types> data;
[numthreads(32, 16, 1)] [numthreads(32, 16, 1)]
void CSMain(uint3 dti : SV_DispatchThreadID) void CSMain(uint3 dti : SV_DispatchThreadID)
{ {
data[dti.x].u = WaveAllSum(data[dti.x].u); data[dti.x].u = WaveActiveSum(data[dti.x].u);
data[dti.x].u.x = WaveAllSum(data[dti.x].u.x); data[dti.x].u.x = WaveActiveSum(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllSum(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveSum(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllSum(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveSum(data[dti.x].u.xyz);
data[dti.x].i = WaveAllSum(data[dti.x].i); data[dti.x].i = WaveActiveSum(data[dti.x].i);
data[dti.x].i.x = WaveAllSum(data[dti.x].i.x); data[dti.x].i.x = WaveActiveSum(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllSum(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveSum(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllSum(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveSum(data[dti.x].i.xyz);
data[dti.x].f = WaveAllSum(data[dti.x].f); data[dti.x].f = WaveActiveSum(data[dti.x].f);
data[dti.x].f.x = WaveAllSum(data[dti.x].f.x); data[dti.x].f.x = WaveActiveSum(data[dti.x].f.x);
data[dti.x].f.xy = WaveAllSum(data[dti.x].f.xy); data[dti.x].f.xy = WaveActiveSum(data[dti.x].f.xy);
data[dti.x].f.xyz = WaveAllSum(data[dti.x].f.xyz); data[dti.x].f.xyz = WaveActiveSum(data[dti.x].f.xyz);
data[dti.x].d = WaveAllSum(data[dti.x].d); data[dti.x].d = WaveActiveSum(data[dti.x].d);
data[dti.x].d.x = WaveAllSum(data[dti.x].d.x); data[dti.x].d.x = WaveActiveSum(data[dti.x].d.x);
data[dti.x].d.xy = WaveAllSum(data[dti.x].d.xy); data[dti.x].d.xy = WaveActiveSum(data[dti.x].d.xy);
data[dti.x].d.xyz = WaveAllSum(data[dti.x].d.xyz); data[dti.x].d.xyz = WaveActiveSum(data[dti.x].d.xyz);
data[dti.x].u = WaveAllProduct(data[dti.x].u); data[dti.x].u = WaveActiveProduct(data[dti.x].u);
data[dti.x].u.x = WaveAllProduct(data[dti.x].u.x); data[dti.x].u.x = WaveActiveProduct(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllProduct(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveProduct(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllProduct(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveProduct(data[dti.x].u.xyz);
data[dti.x].i = WaveAllProduct(data[dti.x].i); data[dti.x].i = WaveActiveProduct(data[dti.x].i);
data[dti.x].i.x = WaveAllProduct(data[dti.x].i.x); data[dti.x].i.x = WaveActiveProduct(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllProduct(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveProduct(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllProduct(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveProduct(data[dti.x].i.xyz);
data[dti.x].f = WaveAllProduct(data[dti.x].f); data[dti.x].f = WaveActiveProduct(data[dti.x].f);
data[dti.x].f.x = WaveAllProduct(data[dti.x].f.x); data[dti.x].f.x = WaveActiveProduct(data[dti.x].f.x);
data[dti.x].f.xy = WaveAllProduct(data[dti.x].f.xy); data[dti.x].f.xy = WaveActiveProduct(data[dti.x].f.xy);
data[dti.x].f.xyz = WaveAllProduct(data[dti.x].f.xyz); data[dti.x].f.xyz = WaveActiveProduct(data[dti.x].f.xyz);
data[dti.x].d = WaveAllProduct(data[dti.x].d); data[dti.x].d = WaveActiveProduct(data[dti.x].d);
data[dti.x].d.x = WaveAllProduct(data[dti.x].d.x); data[dti.x].d.x = WaveActiveProduct(data[dti.x].d.x);
data[dti.x].d.xy = WaveAllProduct(data[dti.x].d.xy); data[dti.x].d.xy = WaveActiveProduct(data[dti.x].d.xy);
data[dti.x].d.xyz = WaveAllProduct(data[dti.x].d.xyz); data[dti.x].d.xyz = WaveActiveProduct(data[dti.x].d.xyz);
data[dti.x].u = WaveAllMin(data[dti.x].u); data[dti.x].u = WaveActiveMin(data[dti.x].u);
data[dti.x].u.x = WaveAllMin(data[dti.x].u.x); data[dti.x].u.x = WaveActiveMin(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllMin(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveMin(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllMin(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveMin(data[dti.x].u.xyz);
data[dti.x].i = WaveAllMin(data[dti.x].i); data[dti.x].i = WaveActiveMin(data[dti.x].i);
data[dti.x].i.x = WaveAllMin(data[dti.x].i.x); data[dti.x].i.x = WaveActiveMin(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllMin(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveMin(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllMin(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveMin(data[dti.x].i.xyz);
data[dti.x].f = WaveAllMin(data[dti.x].f); data[dti.x].f = WaveActiveMin(data[dti.x].f);
data[dti.x].f.x = WaveAllMin(data[dti.x].f.x); data[dti.x].f.x = WaveActiveMin(data[dti.x].f.x);
data[dti.x].f.xy = WaveAllMin(data[dti.x].f.xy); data[dti.x].f.xy = WaveActiveMin(data[dti.x].f.xy);
data[dti.x].f.xyz = WaveAllMin(data[dti.x].f.xyz); data[dti.x].f.xyz = WaveActiveMin(data[dti.x].f.xyz);
data[dti.x].d = WaveAllMin(data[dti.x].d); data[dti.x].d = WaveActiveMin(data[dti.x].d);
data[dti.x].d.x = WaveAllMin(data[dti.x].d.x); data[dti.x].d.x = WaveActiveMin(data[dti.x].d.x);
data[dti.x].d.xy = WaveAllMin(data[dti.x].d.xy); data[dti.x].d.xy = WaveActiveMin(data[dti.x].d.xy);
data[dti.x].d.xyz = WaveAllMin(data[dti.x].d.xyz); data[dti.x].d.xyz = WaveActiveMin(data[dti.x].d.xyz);
data[dti.x].u = WaveAllMax(data[dti.x].u); data[dti.x].u = WaveActiveMax(data[dti.x].u);
data[dti.x].u.x = WaveAllMax(data[dti.x].u.x); data[dti.x].u.x = WaveActiveMax(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllMax(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveMax(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllMax(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveMax(data[dti.x].u.xyz);
data[dti.x].i = WaveAllMax(data[dti.x].i); data[dti.x].i = WaveActiveMax(data[dti.x].i);
data[dti.x].i.x = WaveAllMax(data[dti.x].i.x); data[dti.x].i.x = WaveActiveMax(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllMax(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveMax(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllMax(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveMax(data[dti.x].i.xyz);
data[dti.x].f = WaveAllMax(data[dti.x].f); data[dti.x].f = WaveActiveMax(data[dti.x].f);
data[dti.x].f.x = WaveAllMax(data[dti.x].f.x); data[dti.x].f.x = WaveActiveMax(data[dti.x].f.x);
data[dti.x].f.xy = WaveAllMax(data[dti.x].f.xy); data[dti.x].f.xy = WaveActiveMax(data[dti.x].f.xy);
data[dti.x].f.xyz = WaveAllMax(data[dti.x].f.xyz); data[dti.x].f.xyz = WaveActiveMax(data[dti.x].f.xyz);
data[dti.x].d = WaveAllMax(data[dti.x].d); data[dti.x].d = WaveActiveMax(data[dti.x].d);
data[dti.x].d.x = WaveAllMax(data[dti.x].d.x); data[dti.x].d.x = WaveActiveMax(data[dti.x].d.x);
data[dti.x].d.xy = WaveAllMax(data[dti.x].d.xy); data[dti.x].d.xy = WaveActiveMax(data[dti.x].d.xy);
data[dti.x].d.xyz = WaveAllMax(data[dti.x].d.xyz); data[dti.x].d.xyz = WaveActiveMax(data[dti.x].d.xyz);
data[dti.x].u = WaveAllBitAnd(data[dti.x].u); data[dti.x].u = WaveActiveBitAnd(data[dti.x].u);
data[dti.x].u.x = WaveAllBitAnd(data[dti.x].u.x); data[dti.x].u.x = WaveActiveBitAnd(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllBitAnd(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveBitAnd(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllBitAnd(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveBitAnd(data[dti.x].u.xyz);
data[dti.x].i = WaveAllBitAnd(data[dti.x].i); data[dti.x].i = WaveActiveBitAnd(data[dti.x].i);
data[dti.x].i.x = WaveAllBitAnd(data[dti.x].i.x); data[dti.x].i.x = WaveActiveBitAnd(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllBitAnd(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveBitAnd(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllBitAnd(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveBitAnd(data[dti.x].i.xyz);
data[dti.x].u = WaveAllBitOr(data[dti.x].u); data[dti.x].u = WaveActiveBitOr(data[dti.x].u);
data[dti.x].u.x = WaveAllBitOr(data[dti.x].u.x); data[dti.x].u.x = WaveActiveBitOr(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllBitOr(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveBitOr(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllBitOr(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveBitOr(data[dti.x].u.xyz);
data[dti.x].i = WaveAllBitOr(data[dti.x].i); data[dti.x].i = WaveActiveBitOr(data[dti.x].i);
data[dti.x].i.x = WaveAllBitOr(data[dti.x].i.x); data[dti.x].i.x = WaveActiveBitOr(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllBitOr(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveBitOr(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllBitOr(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveBitOr(data[dti.x].i.xyz);
data[dti.x].u = WaveAllBitXor(data[dti.x].u); data[dti.x].u = WaveActiveBitXor(data[dti.x].u);
data[dti.x].u.x = WaveAllBitXor(data[dti.x].u.x); data[dti.x].u.x = WaveActiveBitXor(data[dti.x].u.x);
data[dti.x].u.xy = WaveAllBitXor(data[dti.x].u.xy); data[dti.x].u.xy = WaveActiveBitXor(data[dti.x].u.xy);
data[dti.x].u.xyz = WaveAllBitXor(data[dti.x].u.xyz); data[dti.x].u.xyz = WaveActiveBitXor(data[dti.x].u.xyz);
data[dti.x].i = WaveAllBitXor(data[dti.x].i); data[dti.x].i = WaveActiveBitXor(data[dti.x].i);
data[dti.x].i.x = WaveAllBitXor(data[dti.x].i.x); data[dti.x].i.x = WaveActiveBitXor(data[dti.x].i.x);
data[dti.x].i.xy = WaveAllBitXor(data[dti.x].i.xy); data[dti.x].i.xy = WaveActiveBitXor(data[dti.x].i.xy);
data[dti.x].i.xyz = WaveAllBitXor(data[dti.x].i.xyz); data[dti.x].i.xyz = WaveActiveBitXor(data[dti.x].i.xyz);
data[dti.x].u.x = WaveActiveCountBits(data[dti.x].u.x == 0);
} }
...@@ -3,7 +3,8 @@ RWStructuredBuffer<uint64_t> data; ...@@ -3,7 +3,8 @@ RWStructuredBuffer<uint64_t> data;
[numthreads(32, 16, 1)] [numthreads(32, 16, 1)]
void CSMain(uint3 dti : SV_DispatchThreadID) void CSMain(uint3 dti : SV_DispatchThreadID)
{ {
data[dti.x] = WaveBallot(WaveAnyTrue(dti.x == 0)); data[dti.x] = WaveActiveBallot(WaveActiveAnyTrue(dti.x == 0));
data[dti.y] = WaveBallot(WaveAllTrue(dti.y == 0)); data[dti.y] = WaveActiveBallot(WaveActiveAllTrue(dti.y == 0));
data[dti.z] = WaveBallot(WaveAllEqual(dti.z == 0)); data[dti.z] = WaveActiveBallot(WaveActiveAllEqualBool(dti.z == 0));
data[dti.z] = WaveActiveBallot(WaveActiveAllEqual(dti.z));
} }
...@@ -927,10 +927,8 @@ enum TOperator { ...@@ -927,10 +927,8 @@ enum TOperator {
// SM6 wave ops // SM6 wave ops
EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize. EOpWaveGetLaneCount, // Will decompose to gl_SubgroupSize.
EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID. EOpWaveGetLaneIndex, // Will decompose to gl_SubgroupInvocationID.
EOpWaveIsHelperLane, // Will decompose to gl_HelperInvocation. EOpWaveActiveCountBits, // Will decompose to subgroupBallotBitCount(subgroupBallot()).
EOpWaveBallot, // Will decompose to subgroupBallot. EOpWavePrefixCountBits, // Will decompose to subgroupBallotInclusiveBitCount(subgroupBallot()).
EOpWaveGetOrderedIndex, // Will decompose to an equation containing gl_SubgroupID.
EOpGlobalOrderedCountIncrement, // Will nice error.
}; };
class TIntermTraverser; class TIntermTraverser;
......
...@@ -367,17 +367,13 @@ INSTANTIATE_TEST_CASE_P( ...@@ -367,17 +367,13 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.type.identifier.frag", "main"}, {"hlsl.type.identifier.frag", "main"},
{"hlsl.typeGraphCopy.vert", "main"}, {"hlsl.typeGraphCopy.vert", "main"},
{"hlsl.typedef.frag", "PixelShaderFunction"}, {"hlsl.typedef.frag", "PixelShaderFunction"},
{"hlsl.wavequery.comp", "CSMain"},
{"hlsl.wavequery.frag", "PixelShaderFunction"},
{"hlsl.wavevote.comp", "CSMain"},
{"hlsl.wavebroadcast.comp", "CSMain"}, {"hlsl.wavebroadcast.comp", "CSMain"},
{"hlsl.wavereduction.comp", "CSMain"},
{"hlsl.waveprefix.comp", "CSMain"}, {"hlsl.waveprefix.comp", "CSMain"},
{"hlsl.wavequad.comp", "CSMain"}, {"hlsl.wavequad.comp", "CSMain"},
{"hlsl.waveordered.comp", "CSMain"}, {"hlsl.wavequery.comp", "CSMain"},
{"hlsl.waveordered2.comp", "CSMain"}, {"hlsl.wavequery.frag", "PixelShaderFunction"},
{"hlsl.waveordered.frag", "PixelShaderFunction"}, {"hlsl.wavereduction.comp", "CSMain"},
{"hlsl.waveordered2.frag", "PixelShaderFunction"}, {"hlsl.wavevote.comp", "CSMain"},
{"hlsl.whileLoop.frag", "PixelShaderFunction"}, {"hlsl.whileLoop.frag", "PixelShaderFunction"},
{"hlsl.void.frag", "PixelShaderFunction"} {"hlsl.void.frag", "PixelShaderFunction"}
}), }),
......
...@@ -5090,19 +5090,9 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& ...@@ -5090,19 +5090,9 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
node = lookupBuiltinVariable("@gl_SubgroupInvocationID", EbvSubgroupInvocation2, type); node = lookupBuiltinVariable("@gl_SubgroupInvocationID", EbvSubgroupInvocation2, type);
break; break;
} }
case EOpWaveIsHelperLane: case EOpWaveActiveCountBits:
{ {
// Mapped to gl_HelperInvocation builtin (We preprend @ to the symbol // Mapped to subgroupBallotBitCount(subgroupBallot()) builtin
// so that it inhabits the symbol table, but has a user-invalid name
// in-case some source HLSL defined the symbol also).
TType type(EbtBool, EvqVaryingIn);
node = lookupBuiltinVariable("@gl_HelperInvocation", EbvHelperInvocation, type);
break;
}
case EOpWaveBallot:
{
// Mapped to subgroupBallot() builtin (NOTE: if an IHV has
// a subgroup size > 64 these wave ops will not work for them!)
// uvec4 type. // uvec4 type.
TType uvec4Type(EbtUint, EvqTemporary, 4); TType uvec4Type(EbtUint, EvqTemporary, 4);
...@@ -5111,63 +5101,34 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& ...@@ -5111,63 +5101,34 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
TIntermTyped* res = intermediate.addBuiltInFunctionCall(loc, TIntermTyped* res = intermediate.addBuiltInFunctionCall(loc,
EOpSubgroupBallot, true, arguments, uvec4Type); EOpSubgroupBallot, true, arguments, uvec4Type);
// And extract a uvec2 for the two highest components. // uint type.
TIntermTyped* xy = handleDotDereference(loc, res, "xy"); TType uintType(EbtUint, EvqTemporary);
// uint64_t type.
TType uint64Type(EbtUint64, EvqTemporary);
// And bitcast the result for a uint64_t
node = intermediate.addBuiltInFunctionCall(loc, node = intermediate.addBuiltInFunctionCall(loc,
EOpPackUint2x32, true, xy, uint64Type); EOpSubgroupBallotBitCount, true, res, uintType);
break; break;
} }
case EOpWaveGetOrderedIndex: case EOpWavePrefixCountBits:
{ {
if (language == EShLangFragment) { // Mapped to subgroupBallotInclusiveBitCount(subgroupBallot())
// NOTE: For HLSL SM6.0 this should work for PS too, but the current GLSL extensions don't allow this. // builtin
error(loc, "WaveGetOrderedIndex() unsupported in a pixel/fragment shader", "WaveGetOrderedIndex", "");
break;
}
TType uintType(EbtUint, EvqVaryingIn);
TIntermTyped* subgroupID = lookupBuiltinVariable("@gl_SubgroupID", EbvSubgroupID, uintType);
TIntermTyped* numSubgroups = lookupBuiltinVariable("@gl_NumSubgroups", EbvNumSubgroups, uintType);
TType uvec3Type(EbtUint, EvqVaryingIn, 3); // uvec4 type.
TIntermTyped* numWorkGroups = lookupBuiltinVariable("@gl_NumWorkGroups", EbvNumWorkGroups, uvec3Type); TType uvec4Type(EbtUint, EvqTemporary, 4);
TIntermTyped* workGroupID = lookupBuiltinVariable("@gl_WorkGroupID", EbvWorkGroupId, uvec3Type);
//x & y components of gl_NumWorkGroups
TIntermTyped* numWorkGroupsX = handleDotDereference(loc, numWorkGroups, "x");
TIntermTyped* numWorkGroupsY = handleDotDereference(loc, numWorkGroups, "y");
// x & y components of globalSize // Get the uvec4 return from subgroupBallot().
TIntermTyped* globalSizeX = handleBinaryMath(loc, "mul", EOpMul, numSubgroups, numWorkGroupsX); TIntermTyped* res = intermediate.addBuiltInFunctionCall(loc,
TIntermTyped* globalSizeY = numWorkGroupsY; EOpSubgroupBallot, true, arguments, uvec4Type);
// x, y & z components of gl_WorkGroupID // uint type.
TIntermTyped* workGroupX = handleDotDereference(loc, workGroupID, "x"); TType uintType(EbtUint, EvqTemporary);
TIntermTyped* workGroupY = handleDotDereference(loc, workGroupID, "y");
TIntermTyped* workGroupZ = handleDotDereference(loc, workGroupID, "z");
// We're going to build up the following variables to get a uniquely ordered ID: node = intermediate.addBuiltInFunctionCall(loc,
// (globalSize.y * gl_WorkGroupID.z + gl_WorkGroupID.y) * globalSize.x + gl_WorkGroupID.x + gl_SubgroupID EOpSubgroupBallotInclusiveBitCount, true, res, uintType);
node = handleBinaryMath(loc, "mul", EOpMul, globalSizeY, workGroupZ);
node = handleBinaryMath(loc, "add", EOpAdd, node, workGroupY);
node = handleBinaryMath(loc, "mul", EOpMul, node, globalSizeX);
node = handleBinaryMath(loc, "add", EOpAdd, node, workGroupX);
node = handleBinaryMath(loc, "add", EOpAdd, node, subgroupID);
break; break;
} }
case EOpGlobalOrderedCountIncrement:
{
// NOTE: For HLSL SM6.0 this should work, but the current GLSL extensions don't allow this.
error(loc, "GlobalOrderedCountIncrement() unsupported", "GlobalOrderedCountIncrement", "");
break;
}
default: default:
break; // most pass through unchanged break; // most pass through unchanged
......
...@@ -905,30 +905,33 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c ...@@ -905,30 +905,33 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c
{ "Consume", nullptr, nullptr, "-", "-", EShLangAll, true }, { "Consume", nullptr, nullptr, "-", "-", EShLangAll, true },
// SM 6.0 // SM 6.0
{ "WaveOnce", "S", "B", "-", "-", EShLangPSCS, false},
{ "WaveIsFirstLane", "S", "B", "-", "-", EShLangPSCS, false},
{ "WaveGetLaneCount", "S", "U", "-", "-", EShLangPSCS, false}, { "WaveGetLaneCount", "S", "U", "-", "-", EShLangPSCS, false},
{ "WaveGetLaneIndex", "S", "U", "-", "-", EShLangPSCS, false}, { "WaveGetLaneIndex", "S", "U", "-", "-", EShLangPSCS, false},
{ "WaveIsHelperLane", "S", "B", "-", "-", EShLangPS, false}, { "WaveActiveAnyTrue", "S", "B", "S", "B", EShLangPSCS, false},
{ "WaveAnyTrue", "S", "B", "S", "B", EShLangPSCS, false}, { "WaveActiveAllTrue", "S", "B", "S", "B", EShLangPSCS, false},
{ "WaveAllTrue", "S", "B", "S", "B", EShLangPSCS, false}, { "WaveActiveBallot", "V4", "U", "S", "B", EShLangPSCS, false},
{ "WaveAllEqual", "S", "B", "S", "B", EShLangPSCS, false},
{ "WaveBallot", "S", "M", "S", "B", EShLangPSCS, false},
{ "WaveReadLaneAt", nullptr, nullptr, "SV,S", "DFUI,U", EShLangPSCS, false}, { "WaveReadLaneAt", nullptr, nullptr, "SV,S", "DFUI,U", EShLangPSCS, false},
{ "WaveReadFirstLane", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveReadFirstLane", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveAllSum", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveAllEqual", "S", "B", "SV", "DFUI", EShLangPSCS, false},
{ "WaveAllProduct", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveAllEqualBool", "S", "B", "S", "B", EShLangPSCS, false},
{ "WaveAllMin", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveCountBits", "S", "U", "S", "B", EShLangPSCS, false},
{ "WaveAllMax", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveAllBitAnd", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveSum", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveAllBitOr", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveProduct", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveAllBitXor", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WaveActiveBitAnd", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveActiveBitOr", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveActiveBitXor", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveActiveMin", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveActiveMax", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WavePrefixSum", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WavePrefixSum", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WavePrefixProduct", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false}, { "WavePrefixProduct", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WavePrefixCountBits", "S", "U", "S", "B", EShLangPSCS, false},
{ "QuadReadAcrossX", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "QuadReadAcrossY", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "QuadReadAcrossDiagonal", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "QuadReadLaneAt", nullptr, nullptr, "SV,S", "DFUI,U", EShLangPSCS, false}, { "QuadReadLaneAt", nullptr, nullptr, "SV,S", "DFUI,U", EShLangPSCS, false},
{ "QuadSwapX", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "QuadSwapY", nullptr, nullptr, "SV", "DFUI", EShLangPSCS, false},
{ "WaveGetOrderedIndex", "S", "U", "-", "-", EShLangPSCS, false},
{ "GlobalOrderedCountIncrement", "S", "U", "S", "U", EShLangPSCS, false},
// Methods for subpass input objects // Methods for subpass input objects
{ "SubpassLoad", "V4", nullptr, "[", "FIU", EShLangPS, true }, { "SubpassLoad", "V4", nullptr, "[", "FIU", EShLangPS, true },
...@@ -1273,30 +1276,31 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil ...@@ -1273,30 +1276,31 @@ void TBuiltInParseablesHlsl::identifyBuiltIns(int /*version*/, EProfile /*profil
symbolTable.relateToOperator(BUILTIN_PREFIX "RestartStrip", EOpMethodRestartStrip); symbolTable.relateToOperator(BUILTIN_PREFIX "RestartStrip", EOpMethodRestartStrip);
// Wave ops // Wave ops
symbolTable.relateToOperator("WaveOnce", EOpSubgroupElect); symbolTable.relateToOperator("WaveIsFirstLane", EOpSubgroupElect);
symbolTable.relateToOperator("WaveGetLaneCount", EOpWaveGetLaneCount); symbolTable.relateToOperator("WaveGetLaneCount", EOpWaveGetLaneCount);
symbolTable.relateToOperator("WaveGetLaneIndex", EOpWaveGetLaneIndex); symbolTable.relateToOperator("WaveGetLaneIndex", EOpWaveGetLaneIndex);
symbolTable.relateToOperator("WaveIsHelperLane", EOpWaveIsHelperLane); symbolTable.relateToOperator("WaveActiveAnyTrue", EOpSubgroupAny);
symbolTable.relateToOperator("WaveAnyTrue", EOpSubgroupAny); symbolTable.relateToOperator("WaveActiveAllTrue", EOpSubgroupAll);
symbolTable.relateToOperator("WaveAllTrue", EOpSubgroupAll); symbolTable.relateToOperator("WaveActiveBallot", EOpSubgroupBallot);
symbolTable.relateToOperator("WaveAllEqual", EOpSubgroupAllEqual);
symbolTable.relateToOperator("WaveBallot", EOpWaveBallot);
symbolTable.relateToOperator("WaveReadLaneAt", EOpSubgroupShuffle);
symbolTable.relateToOperator("WaveReadFirstLane", EOpSubgroupBroadcastFirst); symbolTable.relateToOperator("WaveReadFirstLane", EOpSubgroupBroadcastFirst);
symbolTable.relateToOperator("WaveAllSum", EOpSubgroupAdd); symbolTable.relateToOperator("WaveReadLaneAt", EOpSubgroupShuffle);
symbolTable.relateToOperator("WaveAllProduct", EOpSubgroupMul); symbolTable.relateToOperator("WaveActiveAllEqual", EOpSubgroupAllEqual);
symbolTable.relateToOperator("WaveAllMin", EOpSubgroupMin); symbolTable.relateToOperator("WaveActiveAllEqualBool", EOpSubgroupAllEqual);
symbolTable.relateToOperator("WaveAllMax", EOpSubgroupMax); symbolTable.relateToOperator("WaveActiveCountBits", EOpWaveActiveCountBits);
symbolTable.relateToOperator("WaveAllBitAnd", EOpSubgroupAnd); symbolTable.relateToOperator("WaveActiveSum", EOpSubgroupAdd);
symbolTable.relateToOperator("WaveAllBitOr", EOpSubgroupOr); symbolTable.relateToOperator("WaveActiveProduct", EOpSubgroupMul);
symbolTable.relateToOperator("WaveAllBitXor", EOpSubgroupXor); symbolTable.relateToOperator("WaveActiveBitAnd", EOpSubgroupAnd);
symbolTable.relateToOperator("WaveActiveBitOr", EOpSubgroupOr);
symbolTable.relateToOperator("WaveActiveBitXor", EOpSubgroupXor);
symbolTable.relateToOperator("WaveActiveMin", EOpSubgroupMin);
symbolTable.relateToOperator("WaveActiveMax", EOpSubgroupMax);
symbolTable.relateToOperator("WavePrefixSum", EOpSubgroupInclusiveAdd); symbolTable.relateToOperator("WavePrefixSum", EOpSubgroupInclusiveAdd);
symbolTable.relateToOperator("WavePrefixProduct", EOpSubgroupInclusiveMul); symbolTable.relateToOperator("WavePrefixProduct", EOpSubgroupInclusiveMul);
symbolTable.relateToOperator("WavePrefixCountBits", EOpWavePrefixCountBits);
symbolTable.relateToOperator("QuadReadAcrossX", EOpSubgroupQuadSwapHorizontal);
symbolTable.relateToOperator("QuadReadAcrossY", EOpSubgroupQuadSwapVertical);
symbolTable.relateToOperator("QuadReadAcrossDiagonal", EOpSubgroupQuadSwapDiagonal);
symbolTable.relateToOperator("QuadReadLaneAt", EOpSubgroupQuadBroadcast); symbolTable.relateToOperator("QuadReadLaneAt", EOpSubgroupQuadBroadcast);
symbolTable.relateToOperator("QuadSwapX", EOpSubgroupQuadSwapHorizontal);
symbolTable.relateToOperator("QuadSwapY", EOpSubgroupQuadSwapVertical);
symbolTable.relateToOperator("WaveGetOrderedIndex", EOpWaveGetOrderedIndex);
symbolTable.relateToOperator("GlobalOrderedCountIncrement", EOpGlobalOrderedCountIncrement);
// Subpass input methods // Subpass input methods
symbolTable.relateToOperator(BUILTIN_PREFIX "SubpassLoad", EOpSubpassLoad); symbolTable.relateToOperator(BUILTIN_PREFIX "SubpassLoad", EOpSubpassLoad);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment