Commit ae06d1fe by Rex Xu

Add int16 support to AMD_trinary_minmax and AMD_shader_ballot

parent 643e57cd
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#extension GL_ARB_gpu_shader_int64: enable #extension GL_ARB_gpu_shader_int64: enable
#extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float: enable
#extension GL_AMD_gpu_shader_int16: enable
#extension GL_AMD_shader_ballot: enable #extension GL_AMD_shader_ballot: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
...@@ -15,17 +16,21 @@ layout(binding = 0) buffer Buffers ...@@ -15,17 +16,21 @@ layout(binding = 0) buffer Buffers
int64_t i64; int64_t i64;
u64vec2 u64v; u64vec2 u64v;
f16vec3 f16v; f16vec3 f16v;
i16vec4 i16v;
uint16_t u16;
}; };
void main() void main()
{ {
i = minInvocationsAMD(i); i = minInvocationsAMD(i);
uv = minInvocationsAMD(uv); uv = minInvocationsAMD(uv);
fv = minInvocationsAMD(fv); fv = minInvocationsAMD(fv);
dv = minInvocationsAMD(dv); dv = minInvocationsAMD(dv);
i64 = minInvocationsAMD(i64); i64 = minInvocationsAMD(i64);
u64v = minInvocationsAMD(u64v); u64v = minInvocationsAMD(u64v);
f16v = minInvocationsAMD(f16v); f16v = minInvocationsAMD(f16v);
i16v = minInvocationsAMD(i16v);
u16 = minInvocationsAMD(u16);
i = maxInvocationsAMD(i); i = maxInvocationsAMD(i);
uv = maxInvocationsAMD(uv); uv = maxInvocationsAMD(uv);
...@@ -34,6 +39,8 @@ void main() ...@@ -34,6 +39,8 @@ void main()
i64 = maxInvocationsAMD(i64); i64 = maxInvocationsAMD(i64);
u64v = maxInvocationsAMD(u64v); u64v = maxInvocationsAMD(u64v);
f16v = maxInvocationsAMD(f16v); f16v = maxInvocationsAMD(f16v);
i16v = maxInvocationsAMD(i16v);
u16 = maxInvocationsAMD(u16);
i = addInvocationsAMD(i); i = addInvocationsAMD(i);
uv = addInvocationsAMD(uv); uv = addInvocationsAMD(uv);
...@@ -42,6 +49,8 @@ void main() ...@@ -42,6 +49,8 @@ void main()
i64 = addInvocationsAMD(i64); i64 = addInvocationsAMD(i64);
u64v = addInvocationsAMD(u64v); u64v = addInvocationsAMD(u64v);
f16v = addInvocationsAMD(f16v); f16v = addInvocationsAMD(f16v);
i16v = addInvocationsAMD(i16v);
u16 = addInvocationsAMD(u16);
i = minInvocationsNonUniformAMD(i); i = minInvocationsNonUniformAMD(i);
uv = minInvocationsNonUniformAMD(uv); uv = minInvocationsNonUniformAMD(uv);
...@@ -50,6 +59,8 @@ void main() ...@@ -50,6 +59,8 @@ void main()
i64 = minInvocationsNonUniformAMD(i64); i64 = minInvocationsNonUniformAMD(i64);
u64v = minInvocationsNonUniformAMD(u64v); u64v = minInvocationsNonUniformAMD(u64v);
f16v = minInvocationsNonUniformAMD(f16v); f16v = minInvocationsNonUniformAMD(f16v);
i16v = minInvocationsNonUniformAMD(i16v);
u16 = minInvocationsNonUniformAMD(u16);
i = maxInvocationsNonUniformAMD(i); i = maxInvocationsNonUniformAMD(i);
uv = maxInvocationsNonUniformAMD(uv); uv = maxInvocationsNonUniformAMD(uv);
...@@ -58,6 +69,8 @@ void main() ...@@ -58,6 +69,8 @@ void main()
i64 = maxInvocationsNonUniformAMD(i64); i64 = maxInvocationsNonUniformAMD(i64);
u64v = maxInvocationsNonUniformAMD(u64v); u64v = maxInvocationsNonUniformAMD(u64v);
f16v = maxInvocationsNonUniformAMD(f16v); f16v = maxInvocationsNonUniformAMD(f16v);
i16v = maxInvocationsNonUniformAMD(i16v);
u16 = maxInvocationsNonUniformAMD(u16);
i = addInvocationsNonUniformAMD(i); i = addInvocationsNonUniformAMD(i);
uv = addInvocationsNonUniformAMD(uv); uv = addInvocationsNonUniformAMD(uv);
...@@ -66,6 +79,8 @@ void main() ...@@ -66,6 +79,8 @@ void main()
i64 = addInvocationsNonUniformAMD(i64); i64 = addInvocationsNonUniformAMD(i64);
u64v = addInvocationsNonUniformAMD(u64v); u64v = addInvocationsNonUniformAMD(u64v);
f16v = addInvocationsNonUniformAMD(f16v); f16v = addInvocationsNonUniformAMD(f16v);
i16v = addInvocationsNonUniformAMD(i16v);
u16 = addInvocationsNonUniformAMD(u16);
i = minInvocationsInclusiveScanAMD(i); i = minInvocationsInclusiveScanAMD(i);
uv = minInvocationsInclusiveScanAMD(uv); uv = minInvocationsInclusiveScanAMD(uv);
...@@ -74,6 +89,8 @@ void main() ...@@ -74,6 +89,8 @@ void main()
i64 = minInvocationsInclusiveScanAMD(i64); i64 = minInvocationsInclusiveScanAMD(i64);
u64v = minInvocationsInclusiveScanAMD(u64v); u64v = minInvocationsInclusiveScanAMD(u64v);
f16v = minInvocationsInclusiveScanAMD(f16v); f16v = minInvocationsInclusiveScanAMD(f16v);
i16v = minInvocationsInclusiveScanAMD(i16v);
u16 = minInvocationsInclusiveScanAMD(u16);
i = maxInvocationsInclusiveScanAMD(i); i = maxInvocationsInclusiveScanAMD(i);
uv = maxInvocationsInclusiveScanAMD(uv); uv = maxInvocationsInclusiveScanAMD(uv);
...@@ -82,6 +99,8 @@ void main() ...@@ -82,6 +99,8 @@ void main()
i64 = maxInvocationsInclusiveScanAMD(i64); i64 = maxInvocationsInclusiveScanAMD(i64);
u64v = maxInvocationsInclusiveScanAMD(u64v); u64v = maxInvocationsInclusiveScanAMD(u64v);
f16v = maxInvocationsInclusiveScanAMD(f16v); f16v = maxInvocationsInclusiveScanAMD(f16v);
i16v = maxInvocationsInclusiveScanAMD(i16v);
u16 = maxInvocationsInclusiveScanAMD(u16);
i = addInvocationsInclusiveScanAMD(i); i = addInvocationsInclusiveScanAMD(i);
uv = addInvocationsInclusiveScanAMD(uv); uv = addInvocationsInclusiveScanAMD(uv);
...@@ -90,6 +109,8 @@ void main() ...@@ -90,6 +109,8 @@ void main()
i64 = addInvocationsInclusiveScanAMD(i64); i64 = addInvocationsInclusiveScanAMD(i64);
u64v = addInvocationsInclusiveScanAMD(u64v); u64v = addInvocationsInclusiveScanAMD(u64v);
f16v = addInvocationsInclusiveScanAMD(f16v); f16v = addInvocationsInclusiveScanAMD(f16v);
i16v = addInvocationsInclusiveScanAMD(i16v);
u16 = addInvocationsInclusiveScanAMD(u16);
i = minInvocationsExclusiveScanAMD(i); i = minInvocationsExclusiveScanAMD(i);
uv = minInvocationsExclusiveScanAMD(uv); uv = minInvocationsExclusiveScanAMD(uv);
...@@ -98,6 +119,8 @@ void main() ...@@ -98,6 +119,8 @@ void main()
i64 = minInvocationsExclusiveScanAMD(i64); i64 = minInvocationsExclusiveScanAMD(i64);
u64v = minInvocationsExclusiveScanAMD(u64v); u64v = minInvocationsExclusiveScanAMD(u64v);
f16v = minInvocationsExclusiveScanAMD(f16v); f16v = minInvocationsExclusiveScanAMD(f16v);
i16v = minInvocationsExclusiveScanAMD(i16v);
u16 = minInvocationsExclusiveScanAMD(u16);
i = maxInvocationsExclusiveScanAMD(i); i = maxInvocationsExclusiveScanAMD(i);
uv = maxInvocationsExclusiveScanAMD(uv); uv = maxInvocationsExclusiveScanAMD(uv);
...@@ -106,6 +129,8 @@ void main() ...@@ -106,6 +129,8 @@ void main()
i64 = maxInvocationsExclusiveScanAMD(i64); i64 = maxInvocationsExclusiveScanAMD(i64);
u64v = maxInvocationsExclusiveScanAMD(u64v); u64v = maxInvocationsExclusiveScanAMD(u64v);
f16v = maxInvocationsExclusiveScanAMD(f16v); f16v = maxInvocationsExclusiveScanAMD(f16v);
i16v = maxInvocationsExclusiveScanAMD(i16v);
u16 = maxInvocationsExclusiveScanAMD(u16);
i = addInvocationsExclusiveScanAMD(i); i = addInvocationsExclusiveScanAMD(i);
uv = addInvocationsExclusiveScanAMD(uv); uv = addInvocationsExclusiveScanAMD(uv);
...@@ -114,6 +139,8 @@ void main() ...@@ -114,6 +139,8 @@ void main()
i64 = addInvocationsExclusiveScanAMD(i64); i64 = addInvocationsExclusiveScanAMD(i64);
u64v = addInvocationsExclusiveScanAMD(u64v); u64v = addInvocationsExclusiveScanAMD(u64v);
f16v = addInvocationsExclusiveScanAMD(f16v); f16v = addInvocationsExclusiveScanAMD(f16v);
i16v = addInvocationsExclusiveScanAMD(i16v);
u16 = addInvocationsExclusiveScanAMD(u16);
i = minInvocationsInclusiveScanNonUniformAMD(i); i = minInvocationsInclusiveScanNonUniformAMD(i);
uv = minInvocationsInclusiveScanNonUniformAMD(uv); uv = minInvocationsInclusiveScanNonUniformAMD(uv);
...@@ -122,6 +149,8 @@ void main() ...@@ -122,6 +149,8 @@ void main()
i64 = minInvocationsInclusiveScanNonUniformAMD(i64); i64 = minInvocationsInclusiveScanNonUniformAMD(i64);
u64v = minInvocationsInclusiveScanNonUniformAMD(u64v); u64v = minInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = minInvocationsInclusiveScanNonUniformAMD(f16v); f16v = minInvocationsInclusiveScanNonUniformAMD(f16v);
i16v = minInvocationsInclusiveScanNonUniformAMD(i16v);
u16 = minInvocationsInclusiveScanNonUniformAMD(u16);
i = maxInvocationsInclusiveScanNonUniformAMD(i); i = maxInvocationsInclusiveScanNonUniformAMD(i);
uv = maxInvocationsInclusiveScanNonUniformAMD(uv); uv = maxInvocationsInclusiveScanNonUniformAMD(uv);
...@@ -130,6 +159,8 @@ void main() ...@@ -130,6 +159,8 @@ void main()
i64 = maxInvocationsInclusiveScanNonUniformAMD(i64); i64 = maxInvocationsInclusiveScanNonUniformAMD(i64);
u64v = maxInvocationsInclusiveScanNonUniformAMD(u64v); u64v = maxInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = maxInvocationsInclusiveScanNonUniformAMD(f16v); f16v = maxInvocationsInclusiveScanNonUniformAMD(f16v);
i16v = maxInvocationsInclusiveScanNonUniformAMD(i16v);
u16 = maxInvocationsInclusiveScanNonUniformAMD(u16);
i = addInvocationsInclusiveScanNonUniformAMD(i); i = addInvocationsInclusiveScanNonUniformAMD(i);
uv = addInvocationsInclusiveScanNonUniformAMD(uv); uv = addInvocationsInclusiveScanNonUniformAMD(uv);
...@@ -138,6 +169,8 @@ void main() ...@@ -138,6 +169,8 @@ void main()
i64 = addInvocationsInclusiveScanNonUniformAMD(i64); i64 = addInvocationsInclusiveScanNonUniformAMD(i64);
u64v = addInvocationsInclusiveScanNonUniformAMD(u64v); u64v = addInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = addInvocationsInclusiveScanNonUniformAMD(f16v); f16v = addInvocationsInclusiveScanNonUniformAMD(f16v);
i16v = addInvocationsInclusiveScanNonUniformAMD(i16v);
u16 = addInvocationsInclusiveScanNonUniformAMD(u16);
i = minInvocationsExclusiveScanNonUniformAMD(i); i = minInvocationsExclusiveScanNonUniformAMD(i);
uv = minInvocationsExclusiveScanNonUniformAMD(uv); uv = minInvocationsExclusiveScanNonUniformAMD(uv);
...@@ -146,6 +179,8 @@ void main() ...@@ -146,6 +179,8 @@ void main()
i64 = minInvocationsExclusiveScanNonUniformAMD(i64); i64 = minInvocationsExclusiveScanNonUniformAMD(i64);
u64v = minInvocationsExclusiveScanNonUniformAMD(u64v); u64v = minInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = minInvocationsExclusiveScanNonUniformAMD(f16v); f16v = minInvocationsExclusiveScanNonUniformAMD(f16v);
i16v = minInvocationsExclusiveScanNonUniformAMD(i16v);
u16 = minInvocationsExclusiveScanNonUniformAMD(u16);
i = maxInvocationsExclusiveScanNonUniformAMD(i); i = maxInvocationsExclusiveScanNonUniformAMD(i);
uv = maxInvocationsExclusiveScanNonUniformAMD(uv); uv = maxInvocationsExclusiveScanNonUniformAMD(uv);
...@@ -154,6 +189,8 @@ void main() ...@@ -154,6 +189,8 @@ void main()
i64 = maxInvocationsExclusiveScanNonUniformAMD(i64); i64 = maxInvocationsExclusiveScanNonUniformAMD(i64);
u64v = maxInvocationsExclusiveScanNonUniformAMD(u64v); u64v = maxInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = maxInvocationsExclusiveScanNonUniformAMD(f16v); f16v = maxInvocationsExclusiveScanNonUniformAMD(f16v);
i16v = maxInvocationsExclusiveScanNonUniformAMD(i16v);
u16 = maxInvocationsExclusiveScanNonUniformAMD(u16);
i = addInvocationsExclusiveScanNonUniformAMD(i); i = addInvocationsExclusiveScanNonUniformAMD(i);
uv = addInvocationsExclusiveScanNonUniformAMD(uv); uv = addInvocationsExclusiveScanNonUniformAMD(uv);
...@@ -162,4 +199,6 @@ void main() ...@@ -162,4 +199,6 @@ void main()
i64 = addInvocationsExclusiveScanNonUniformAMD(i64); i64 = addInvocationsExclusiveScanNonUniformAMD(i64);
u64v = addInvocationsExclusiveScanNonUniformAMD(u64v); u64v = addInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = addInvocationsExclusiveScanNonUniformAMD(f16v); f16v = addInvocationsExclusiveScanNonUniformAMD(f16v);
i16v = addInvocationsExclusiveScanNonUniformAMD(i16v);
u16 = addInvocationsExclusiveScanNonUniformAMD(u16);
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment