Commit 430ef40a by Rex Xu

Implement new revision of extension GL_AMD_shader_ballot

- Add support for invocation functions with "InclusiveScan" and "ExclusiveScan" modes. - Add support for invocation functions taking int64/uint64/doube/float16 as inout data types.
parent 33dadd12
...@@ -2354,7 +2354,7 @@ void Builder::dump(std::vector<unsigned int>& out) const ...@@ -2354,7 +2354,7 @@ void Builder::dump(std::vector<unsigned int>& out) const
for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) { for (auto it = extensions.cbegin(); it != extensions.cend(); ++it) {
Instruction extInst(0, 0, OpExtension); Instruction extInst(0, 0, OpExtension);
extInst.addStringOperand(*it); extInst.addStringOperand(it->c_str());
extInst.dump(out); extInst.dump(out);
} }
......
...@@ -555,7 +555,7 @@ public: ...@@ -555,7 +555,7 @@ public:
SourceLanguage source; SourceLanguage source;
int sourceVersion; int sourceVersion;
std::set<const char*> extensions; std::set<std::string> extensions;
std::vector<const char*> sourceExtensions; std::vector<const char*> sourceExtensions;
AddressingModel addressModel; AddressingModel addressModel;
MemoryModel memoryModel; MemoryModel memoryModel;
......
#version 450
#extension GL_ARB_gpu_shader_int64: enable
#extension GL_AMD_gpu_shader_half_float: enable
#extension GL_AMD_shader_ballot: enable
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) buffer Buffers
{
int i;
uvec2 uv;
vec3 fv;
dvec4 dv;
int64_t i64;
u64vec2 u64v;
f16vec3 f16v;
};
void main()
{
i = minInvocationsAMD(i);
uv = minInvocationsAMD(uv);
fv = minInvocationsAMD(fv);
dv = minInvocationsAMD(dv);
i64 = minInvocationsAMD(i64);
u64v = minInvocationsAMD(u64v);
f16v = minInvocationsAMD(f16v);
i = maxInvocationsAMD(i);
uv = maxInvocationsAMD(uv);
fv = maxInvocationsAMD(fv);
dv = maxInvocationsAMD(dv);
i64 = maxInvocationsAMD(i64);
u64v = maxInvocationsAMD(u64v);
f16v = maxInvocationsAMD(f16v);
i = addInvocationsAMD(i);
uv = addInvocationsAMD(uv);
fv = addInvocationsAMD(fv);
dv = addInvocationsAMD(dv);
i64 = addInvocationsAMD(i64);
u64v = addInvocationsAMD(u64v);
f16v = addInvocationsAMD(f16v);
i = minInvocationsNonUniformAMD(i);
uv = minInvocationsNonUniformAMD(uv);
fv = minInvocationsNonUniformAMD(fv);
dv = minInvocationsNonUniformAMD(dv);
i64 = minInvocationsNonUniformAMD(i64);
u64v = minInvocationsNonUniformAMD(u64v);
f16v = minInvocationsNonUniformAMD(f16v);
i = maxInvocationsNonUniformAMD(i);
uv = maxInvocationsNonUniformAMD(uv);
fv = maxInvocationsNonUniformAMD(fv);
dv = maxInvocationsNonUniformAMD(dv);
i64 = maxInvocationsNonUniformAMD(i64);
u64v = maxInvocationsNonUniformAMD(u64v);
f16v = maxInvocationsNonUniformAMD(f16v);
i = addInvocationsNonUniformAMD(i);
uv = addInvocationsNonUniformAMD(uv);
fv = addInvocationsNonUniformAMD(fv);
dv = addInvocationsNonUniformAMD(dv);
i64 = addInvocationsNonUniformAMD(i64);
u64v = addInvocationsNonUniformAMD(u64v);
f16v = addInvocationsNonUniformAMD(f16v);
i = minInvocationsInclusiveScanAMD(i);
uv = minInvocationsInclusiveScanAMD(uv);
fv = minInvocationsInclusiveScanAMD(fv);
dv = minInvocationsInclusiveScanAMD(dv);
i64 = minInvocationsInclusiveScanAMD(i64);
u64v = minInvocationsInclusiveScanAMD(u64v);
f16v = minInvocationsInclusiveScanAMD(f16v);
i = maxInvocationsInclusiveScanAMD(i);
uv = maxInvocationsInclusiveScanAMD(uv);
fv = maxInvocationsInclusiveScanAMD(fv);
dv = maxInvocationsInclusiveScanAMD(dv);
i64 = maxInvocationsInclusiveScanAMD(i64);
u64v = maxInvocationsInclusiveScanAMD(u64v);
f16v = maxInvocationsInclusiveScanAMD(f16v);
i = addInvocationsInclusiveScanAMD(i);
uv = addInvocationsInclusiveScanAMD(uv);
fv = addInvocationsInclusiveScanAMD(fv);
dv = addInvocationsInclusiveScanAMD(dv);
i64 = addInvocationsInclusiveScanAMD(i64);
u64v = addInvocationsInclusiveScanAMD(u64v);
f16v = addInvocationsInclusiveScanAMD(f16v);
i = minInvocationsExclusiveScanAMD(i);
uv = minInvocationsExclusiveScanAMD(uv);
fv = minInvocationsExclusiveScanAMD(fv);
dv = minInvocationsExclusiveScanAMD(dv);
i64 = minInvocationsExclusiveScanAMD(i64);
u64v = minInvocationsExclusiveScanAMD(u64v);
f16v = minInvocationsExclusiveScanAMD(f16v);
i = maxInvocationsExclusiveScanAMD(i);
uv = maxInvocationsExclusiveScanAMD(uv);
fv = maxInvocationsExclusiveScanAMD(fv);
dv = maxInvocationsExclusiveScanAMD(dv);
i64 = maxInvocationsExclusiveScanAMD(i64);
u64v = maxInvocationsExclusiveScanAMD(u64v);
f16v = maxInvocationsExclusiveScanAMD(f16v);
i = addInvocationsExclusiveScanAMD(i);
uv = addInvocationsExclusiveScanAMD(uv);
fv = addInvocationsExclusiveScanAMD(fv);
dv = addInvocationsExclusiveScanAMD(dv);
i64 = addInvocationsExclusiveScanAMD(i64);
u64v = addInvocationsExclusiveScanAMD(u64v);
f16v = addInvocationsExclusiveScanAMD(f16v);
i = minInvocationsInclusiveScanNonUniformAMD(i);
uv = minInvocationsInclusiveScanNonUniformAMD(uv);
fv = minInvocationsInclusiveScanNonUniformAMD(fv);
dv = minInvocationsInclusiveScanNonUniformAMD(dv);
i64 = minInvocationsInclusiveScanNonUniformAMD(i64);
u64v = minInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = minInvocationsInclusiveScanNonUniformAMD(f16v);
i = maxInvocationsInclusiveScanNonUniformAMD(i);
uv = maxInvocationsInclusiveScanNonUniformAMD(uv);
fv = maxInvocationsInclusiveScanNonUniformAMD(fv);
dv = maxInvocationsInclusiveScanNonUniformAMD(dv);
i64 = maxInvocationsInclusiveScanNonUniformAMD(i64);
u64v = maxInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = maxInvocationsInclusiveScanNonUniformAMD(f16v);
i = addInvocationsInclusiveScanNonUniformAMD(i);
uv = addInvocationsInclusiveScanNonUniformAMD(uv);
fv = addInvocationsInclusiveScanNonUniformAMD(fv);
dv = addInvocationsInclusiveScanNonUniformAMD(dv);
i64 = addInvocationsInclusiveScanNonUniformAMD(i64);
u64v = addInvocationsInclusiveScanNonUniformAMD(u64v);
f16v = addInvocationsInclusiveScanNonUniformAMD(f16v);
i = minInvocationsExclusiveScanNonUniformAMD(i);
uv = minInvocationsExclusiveScanNonUniformAMD(uv);
fv = minInvocationsExclusiveScanNonUniformAMD(fv);
dv = minInvocationsExclusiveScanNonUniformAMD(dv);
i64 = minInvocationsExclusiveScanNonUniformAMD(i64);
u64v = minInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = minInvocationsExclusiveScanNonUniformAMD(f16v);
i = maxInvocationsExclusiveScanNonUniformAMD(i);
uv = maxInvocationsExclusiveScanNonUniformAMD(uv);
fv = maxInvocationsExclusiveScanNonUniformAMD(fv);
dv = maxInvocationsExclusiveScanNonUniformAMD(dv);
i64 = maxInvocationsExclusiveScanNonUniformAMD(i64);
u64v = maxInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = maxInvocationsExclusiveScanNonUniformAMD(f16v);
i = addInvocationsExclusiveScanNonUniformAMD(i);
uv = addInvocationsExclusiveScanNonUniformAMD(uv);
fv = addInvocationsExclusiveScanNonUniformAMD(fv);
dv = addInvocationsExclusiveScanNonUniformAMD(dv);
i64 = addInvocationsExclusiveScanNonUniformAMD(i64);
u64v = addInvocationsExclusiveScanNonUniformAMD(u64v);
f16v = addInvocationsExclusiveScanNonUniformAMD(f16v);
}
...@@ -335,6 +335,18 @@ enum TOperator { ...@@ -335,6 +335,18 @@ enum TOperator {
EOpMinInvocationsNonUniform, EOpMinInvocationsNonUniform,
EOpMaxInvocationsNonUniform, EOpMaxInvocationsNonUniform,
EOpAddInvocationsNonUniform, EOpAddInvocationsNonUniform,
EOpMinInvocationsInclusiveScan,
EOpMaxInvocationsInclusiveScan,
EOpAddInvocationsInclusiveScan,
EOpMinInvocationsInclusiveScanNonUniform,
EOpMaxInvocationsInclusiveScanNonUniform,
EOpAddInvocationsInclusiveScanNonUniform,
EOpMinInvocationsExclusiveScan,
EOpMaxInvocationsExclusiveScan,
EOpAddInvocationsExclusiveScan,
EOpMinInvocationsExclusiveScanNonUniform,
EOpMaxInvocationsExclusiveScanNonUniform,
EOpAddInvocationsExclusiveScanNonUniform,
EOpSwizzleInvocations, EOpSwizzleInvocations,
EOpSwizzleInvocationsMasked, EOpSwizzleInvocationsMasked,
EOpWriteInvocation, EOpWriteInvocation,
......
...@@ -377,6 +377,21 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node) ...@@ -377,6 +377,21 @@ bool TOutputTraverser::visitUnary(TVisit /* visit */, TIntermUnary* node)
case EOpMinInvocationsNonUniform: out.debug << "minInvocationsNonUniform"; break; case EOpMinInvocationsNonUniform: out.debug << "minInvocationsNonUniform"; break;
case EOpMaxInvocationsNonUniform: out.debug << "maxInvocationsNonUniform"; break; case EOpMaxInvocationsNonUniform: out.debug << "maxInvocationsNonUniform"; break;
case EOpAddInvocationsNonUniform: out.debug << "addInvocationsNonUniform"; break; case EOpAddInvocationsNonUniform: out.debug << "addInvocationsNonUniform"; break;
case EOpMinInvocationsInclusiveScan: out.debug << "minInvocationsInclusiveScan"; break;
case EOpMaxInvocationsInclusiveScan: out.debug << "maxInvocationsInclusiveScan"; break;
case EOpAddInvocationsInclusiveScan: out.debug << "addInvocationsInclusiveScan"; break;
case EOpMinInvocationsInclusiveScanNonUniform: out.debug << "minInvocationsInclusiveScanNonUniform"; break;
case EOpMaxInvocationsInclusiveScanNonUniform: out.debug << "maxInvocationsInclusiveScanNonUniform"; break;
case EOpAddInvocationsInclusiveScanNonUniform: out.debug << "addInvocationsInclusiveScanNonUniform"; break;
case EOpMinInvocationsExclusiveScan: out.debug << "minInvocationsExclusiveScan"; break;
case EOpMaxInvocationsExclusiveScan: out.debug << "maxInvocationsExclusiveScan"; break;
case EOpAddInvocationsExclusiveScan: out.debug << "addInvocationsExclusiveScan"; break;
case EOpMinInvocationsExclusiveScanNonUniform: out.debug << "minInvocationsExclusiveScanNonUniform"; break;
case EOpMaxInvocationsExclusiveScanNonUniform: out.debug << "maxInvocationsExclusiveScanNonUniform"; break;
case EOpAddInvocationsExclusiveScanNonUniform: out.debug << "addInvocationsExclusiveScanNonUniform"; break;
case EOpMbcnt: out.debug << "mbcnt"; break; case EOpMbcnt: out.debug << "mbcnt"; break;
case EOpCubeFaceIndex: out.debug << "cubeFaceIndex"; break; case EOpCubeFaceIndex: out.debug << "cubeFaceIndex"; break;
......
...@@ -358,6 +358,7 @@ INSTANTIATE_TEST_CASE_P( ...@@ -358,6 +358,7 @@ INSTANTIATE_TEST_CASE_P(
Glsl, CompileVulkanToSpirvTestAMD, Glsl, CompileVulkanToSpirvTestAMD,
::testing::ValuesIn(std::vector<std::string>({ ::testing::ValuesIn(std::vector<std::string>({
"spv.float16.frag", "spv.float16.frag",
"spv.shaderBallotAMD.comp"
})), })),
FileNameAsCustomTestSuffix FileNameAsCustomTestSuffix
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment