Commit a6a7842f by Jiawei Shao Committed by Commit Bot

ES31: Support atomic functions on D3D11 - Part I

This patch is the first one of the implementation of atomic functions in D3D11. There are mainly two differences in the usage of GLSL and HLSL atomic functions: 1. All GLSL atomic functions have return values, which all represent the original value of the shared or ssbo variable; while all HLSL atomic functions don't, and the original value can be stored in the last parameter of the function call. 2. For HLSL atomic functions, the last parameter that stores the original value is optional except for InterlockedExchange and InterlockedCompareExchange. Missing original_value in the call of InterlockedExchange and InterlockedCompareExchange results in a compile error from HLSL compiler. To handle these differences, we plan to implement the translation in two steps: 1. Support direct translations from GLSL atomic functions to HLSL ones. Direct translation can only handle the following two situations: (1) The sentence is a GLSL atomic function call without requesting a return value and it is not atomicExchange or atomicCompSwap: e.g. GLSL: atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value); (2) The sentence is a simple assignment expression: its right is a GLSL atomic function call and its left is a declared variable. e.g. GLSL: oldValue = atomicAdd(mem, value); -> HLSL: InterlockedAdd(mem, value, oldValue); 2. Support atomic functions in the situations that don't support direct translations. We will modify the intermediate tree to make direct translation work on all these situations. e.g. atomicExchange(mem, value); -> int oldValue; oldValue = atomicExchange(mem, value); int oldValue = atomicAdd(mem, value); -> int oldValue; oldValue = atomicAdd(mem, value); return atomicAdd(mem, value); -> int temp; temp = atomicAdd(mem, value); return temp; for (i = 0; i < atomicAdd(mem, value); ++i) -> int temp; temp = atomicAdd(mem, value); for (i = 0; i < temp; ++i) { ... temp = atomicAdd(mem, value); } int result = isTrue ? atomicAdd(mem, value) : 0; -> int result; if (isTrue) { result = atomicAdd(mem, value); } else { result = 0; } This patch completes Step 1 which mainly focus on the translation from GLSL atomic functions to HLSL ones. BUG=angleproject:2682 TEST=angle_end2end_tests Change-Id: I3b655b6e286dad4fd97f255f7fe87521c94db30c Reviewed-on: https://chromium-review.googlesource.com/1121835 Commit-Queue: Jiawei Shao <jiawei.shao@intel.com> Reviewed-by: 's avatarOlli Etuaho <oetuaho@nvidia.com>
parent a2f043d8
......@@ -345,6 +345,23 @@ const char *GetOperatorString(TOperator op)
case EOpGroupMemoryBarrier:
return "groupMemoryBarrier";
case EOpAtomicAdd:
return "atomicAdd";
case EOpAtomicMin:
return "atomicMin";
case EOpAtomicMax:
return "atomicMax";
case EOpAtomicAnd:
return "atomicAnd";
case EOpAtomicOr:
return "atomicOr";
case EOpAtomicXor:
return "atomicXor";
case EOpAtomicExchange:
return "atomicExchange";
case EOpAtomicCompSwap:
return "atomicCompSwap";
case EOpEmitVertex:
return "EmitVertex";
case EOpEndPrimitive:
......@@ -383,3 +400,21 @@ bool IsAssignment(TOperator op)
return false;
}
}
bool IsAtomicFunction(TOperator op)
{
switch (op)
{
case EOpAtomicAdd:
case EOpAtomicMin:
case EOpAtomicMax:
case EOpAtomicAnd:
case EOpAtomicOr:
case EOpAtomicXor:
case EOpAtomicExchange:
case EOpAtomicCompSwap:
return true;
default:
return false;
}
}
......@@ -241,6 +241,16 @@ enum TOperator
EOpMemoryBarrierShared,
EOpGroupMemoryBarrier,
// Atomic functions
EOpAtomicAdd,
EOpAtomicMin,
EOpAtomicMax,
EOpAtomicAnd,
EOpAtomicOr,
EOpAtomicXor,
EOpAtomicExchange,
EOpAtomicCompSwap,
// Geometry only
EOpEmitVertex,
EOpEndPrimitive
......@@ -252,4 +262,7 @@ const char *GetOperatorString(TOperator op);
// Say whether or not a binary or unary operation changes the value of a variable.
bool IsAssignment(TOperator op);
// Say whether or not an operator represents an atomic function.
bool IsAtomicFunction(TOperator op);
#endif // COMPILER_TRANSLATOR_OPERATOR_H_
......@@ -975,6 +975,14 @@ bool TOutputGLSLBase::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpMemoryBarrierImage:
case EOpMemoryBarrierShared:
case EOpGroupMemoryBarrier:
case EOpAtomicAdd:
case EOpAtomicMin:
case EOpAtomicMax:
case EOpAtomicAnd:
case EOpAtomicOr:
case EOpAtomicXor:
case EOpAtomicExchange:
case EOpAtomicCompSwap:
case EOpEmitVertex:
case EOpEndPrimitive:
writeBuiltInFunctionTriplet(visit, node->getOp(), node->getUseEmulatedFunction());
......
......@@ -80,6 +80,38 @@ bool IsInStd140InterfaceBlock(TIntermTyped *node)
return false;
}
const char *GetHLSLAtomicFunctionStringAndLeftParenthesis(TOperator op)
{
switch (op)
{
case EOpAtomicAdd:
return "InterlockedAdd(";
case EOpAtomicMin:
return "InterlockedMin(";
case EOpAtomicMax:
return "InterlockedMax(";
case EOpAtomicAnd:
return "InterlockedAnd(";
case EOpAtomicOr:
return "InterlockedOr(";
case EOpAtomicXor:
return "InterlockedXor(";
case EOpAtomicExchange:
return "InterlockedExchange(";
case EOpAtomicCompSwap:
return "InterlockedCompareExchange(";
default:
UNREACHABLE();
return "";
}
}
bool IsAtomicFunctionDirectAssign(const TIntermBinary &node)
{
return node.getOp() == EOpAssign && node.getRight()->getAsAggregate() &&
IsAtomicFunction(node.getRight()->getAsAggregate()->getOp());
}
} // anonymous namespace
TReferencedBlock::TReferencedBlock(const TInterfaceBlock *aBlock,
......@@ -1142,6 +1174,27 @@ bool OutputHLSL::visitBinary(Visit visit, TIntermBinary *node)
// function call is assigned.
ASSERT(rightAgg == nullptr);
}
// Assignment expressions with atomic functions should be transformed into atomic
// function calls in HLSL.
// e.g. original_value = atomicAdd(dest, value) should be translated into
// InterlockedAdd(dest, value, original_value);
else if (IsAtomicFunctionDirectAssign(*node))
{
TIntermAggregate *atomicFunctionNode = node->getRight()->getAsAggregate();
TOperator atomicFunctionOp = atomicFunctionNode->getOp();
out << GetHLSLAtomicFunctionStringAndLeftParenthesis(atomicFunctionOp);
TIntermSequence *argumentSeq = atomicFunctionNode->getSequence();
ASSERT(argumentSeq->size() >= 2u);
for (auto &argument : *argumentSeq)
{
argument->traverse(this);
out << ", ";
}
node->getLeft()->traverse(this);
out << ")";
return false;
}
outputAssign(visit, node->getType(), out);
break;
case EOpInitialize:
......@@ -2139,6 +2192,29 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpMemoryBarrier:
outputTriplet(out, visit, "AllMemoryBarrier(", "", ")");
break;
// Single atomic function calls without return value.
// e.g. atomicAdd(dest, value) should be translated into InterlockedAdd(dest, value).
case EOpAtomicAdd:
case EOpAtomicMin:
case EOpAtomicMax:
case EOpAtomicAnd:
case EOpAtomicOr:
case EOpAtomicXor:
outputTriplet(out, visit, GetHLSLAtomicFunctionStringAndLeftParenthesis(node->getOp()),
",", ")");
break;
// The parameter 'original_value' of InterlockedExchange(dest, value, original_value) and
// InterlockedCompareExchange(dest, compare_value, value, original_value) is not optional.
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedexchange
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedcompareexchange
// So all the call of atomicExchange(dest, value) and atomicCompSwap(dest, compare_value,
// value) should all be modified into the form of "int temp; temp = atomicExchange(dest,
// value);" and "int temp; temp = atomicCompSwap(dest, compare_value, value);" in the
// intermediate tree before traversing outputHLSL.
case EOpAtomicExchange:
case EOpAtomicCompSwap:
default:
UNREACHABLE();
}
......
......@@ -5620,10 +5620,10 @@ void TParseContext::checkTextureOffsetConst(TIntermAggregate *functionCall)
void TParseContext::checkAtomicMemoryBuiltinFunctions(TIntermAggregate *functionCall)
{
ASSERT(functionCall->getOp() == EOpCallBuiltInFunction);
const TFunction *func = functionCall->getFunction();
if (BuiltInGroup::isAtomicMemory(func))
{
ASSERT(IsAtomicFunction(functionCall->getOp()));
TIntermSequence *arguments = functionCall->getSequence();
TIntermTyped *memNode = (*arguments)[0]->getAsTyped();
......@@ -5845,10 +5845,13 @@ TIntermTyped *TParseContext::addNonConstructorFunctionCall(TFunctionLookup *fnCa
ASSERT(callNode != nullptr);
return callNode;
}
TIntermAggregate *callNode =
TIntermAggregate::CreateBuiltInFunctionCall(*fnCandidate, &fnCall->arguments());
callNode->setLine(loc);
checkAtomicMemoryBuiltinFunctions(callNode);
// Some built-in functions have out parameters too.
functionCallRValueLValueErrorCheck(fnCandidate, callNode);
......@@ -5864,7 +5867,6 @@ TIntermTyped *TParseContext::addNonConstructorFunctionCall(TFunctionLookup *fnCa
checkTextureOffsetConst(callNode);
checkTextureGather(callNode);
checkImageMemoryAccessForBuiltinFunctions(callNode);
checkAtomicMemoryBuiltinFunctions(callNode);
functionCallRValueLValueErrorCheck(fnCandidate, callNode);
return callNode;
}
......
......@@ -9793,7 +9793,7 @@ constexpr const TFunction kFunction_atomicAdd_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicAdd,
false);
constexpr const TFunction kFunction_atomicAdd_0C0C(
BuiltInId::atomicAdd_Int1_Int1,
......@@ -9802,7 +9802,7 @@ constexpr const TFunction kFunction_atomicAdd_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicAdd,
false);
constexpr const TFunction kFunction_atomicMin_0D0D(
BuiltInId::atomicMin_UInt1_UInt1,
......@@ -9811,7 +9811,7 @@ constexpr const TFunction kFunction_atomicMin_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicMin,
false);
constexpr const TFunction kFunction_atomicMin_0C0C(
BuiltInId::atomicMin_Int1_Int1,
......@@ -9820,7 +9820,7 @@ constexpr const TFunction kFunction_atomicMin_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicMin,
false);
constexpr const TFunction kFunction_atomicMax_0D0D(
BuiltInId::atomicMax_UInt1_UInt1,
......@@ -9829,7 +9829,7 @@ constexpr const TFunction kFunction_atomicMax_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicMax,
false);
constexpr const TFunction kFunction_atomicMax_0C0C(
BuiltInId::atomicMax_Int1_Int1,
......@@ -9838,7 +9838,7 @@ constexpr const TFunction kFunction_atomicMax_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicMax,
false);
constexpr const TFunction kFunction_atomicAnd_0D0D(
BuiltInId::atomicAnd_UInt1_UInt1,
......@@ -9847,7 +9847,7 @@ constexpr const TFunction kFunction_atomicAnd_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicAnd,
false);
constexpr const TFunction kFunction_atomicAnd_0C0C(
BuiltInId::atomicAnd_Int1_Int1,
......@@ -9856,7 +9856,7 @@ constexpr const TFunction kFunction_atomicAnd_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicAnd,
false);
constexpr const TFunction kFunction_atomicOr_0D0D(
BuiltInId::atomicOr_UInt1_UInt1,
......@@ -9865,7 +9865,7 @@ constexpr const TFunction kFunction_atomicOr_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicOr,
false);
constexpr const TFunction kFunction_atomicOr_0C0C(
BuiltInId::atomicOr_Int1_Int1,
......@@ -9874,7 +9874,7 @@ constexpr const TFunction kFunction_atomicOr_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicOr,
false);
constexpr const TFunction kFunction_atomicXor_0D0D(
BuiltInId::atomicXor_UInt1_UInt1,
......@@ -9883,7 +9883,7 @@ constexpr const TFunction kFunction_atomicXor_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicXor,
false);
constexpr const TFunction kFunction_atomicXor_0C0C(
BuiltInId::atomicXor_Int1_Int1,
......@@ -9892,7 +9892,7 @@ constexpr const TFunction kFunction_atomicXor_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicXor,
false);
constexpr const TFunction kFunction_atomicExchange_0D0D(
BuiltInId::atomicExchange_UInt1_UInt1,
......@@ -9901,7 +9901,7 @@ constexpr const TFunction kFunction_atomicExchange_0D0D(
BuiltInParameters::p_io_0D0D0D,
2,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicExchange,
false);
constexpr const TFunction kFunction_atomicExchange_0C0C(
BuiltInId::atomicExchange_Int1_Int1,
......@@ -9910,7 +9910,7 @@ constexpr const TFunction kFunction_atomicExchange_0C0C(
BuiltInParameters::p_io_0C0C0C,
2,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicExchange,
false);
constexpr const TFunction kFunction_atomicCompSwap_0D0D0D(
BuiltInId::atomicCompSwap_UInt1_UInt1_UInt1,
......@@ -9919,7 +9919,7 @@ constexpr const TFunction kFunction_atomicCompSwap_0D0D0D(
BuiltInParameters::p_io_0D0D0D,
3,
StaticType::Get<EbtUInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicCompSwap,
false);
constexpr const TFunction kFunction_atomicCompSwap_0C0C0C(
BuiltInId::atomicCompSwap_Int1_Int1_Int1,
......@@ -9928,7 +9928,7 @@ constexpr const TFunction kFunction_atomicCompSwap_0C0C0C(
BuiltInParameters::p_io_0C0C0C,
3,
StaticType::Get<EbtInt, EbpUndefined, EvqGlobal, 1, 1>(),
EOpCallBuiltInFunction,
EOpAtomicCompSwap,
false);
constexpr const TFunction kFunction_imageSize_0c(
BuiltInId::imageSize_Image2D1,
......
......@@ -508,7 +508,7 @@ GROUP BEGIN AtomicCounter
GROUP END AtomicCounter
GROUP BEGIN AtomicMemory {"queryFunction": true}
DEFAULT METADATA {"level": "ESSL3_1_BUILTINS", "op": "CallBuiltInFunction"}
DEFAULT METADATA {"level": "ESSL3_1_BUILTINS", "op": "auto"}
uint atomicAdd(inout uint, uint);
int atomicAdd(inout int, int);
uint atomicMin(inout uint, uint);
......
71be250213ce3f9d36417ff880fb4100
\ No newline at end of file
fe7f387068b72dc71759198547902721
\ No newline at end of file
......@@ -20,34 +20,35 @@ class ComputeShaderTest : public ANGLETest
protected:
ComputeShaderTest() {}
template <GLint kWidth, GLint kHeight>
template <class T, GLint kWidth, GLint kHeight>
void runSharedMemoryTest(const char *csSource,
const std::array<GLuint, kWidth * kHeight> &inputData,
const std::array<GLuint, kWidth * kHeight> &expectedValues)
GLenum internalFormat,
GLenum format,
const std::array<T, kWidth * kHeight> &inputData,
const std::array<T, kWidth * kHeight> &expectedValues)
{
GLTexture texture[2];
GLFramebuffer framebuffer;
glBindTexture(GL_TEXTURE_2D, texture[0]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format,
inputData.data());
EXPECT_GL_NO_ERROR();
constexpr GLuint initData[kWidth * kHeight] = {};
constexpr T initData[kWidth * kHeight] = {};
glBindTexture(GL_TEXTURE_2D, texture[1]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT,
initData);
glTexStorage2D(GL_TEXTURE_2D, 1, internalFormat, kWidth, kHeight);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, format, initData);
EXPECT_GL_NO_ERROR();
ANGLE_GL_COMPUTE_PROGRAM(program, csSource);
glUseProgram(program.get());
glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
glBindImageTexture(0, texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, internalFormat);
EXPECT_GL_NO_ERROR();
glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
glBindImageTexture(1, texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, internalFormat);
EXPECT_GL_NO_ERROR();
glDispatchCompute(1, 1, 1);
......@@ -55,14 +56,14 @@ class ComputeShaderTest : public ANGLETest
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
GLuint outputValues[kWidth * kHeight];
T outputValues[kWidth * kHeight] = {};
glUseProgram(0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture[1],
0);
EXPECT_GL_NO_ERROR();
glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, outputValues);
glReadPixels(0, 0, kWidth, kHeight, GL_RED_INTEGER, format, outputValues);
EXPECT_GL_NO_ERROR();
for (int i = 0; i < kWidth * kHeight; i++)
......@@ -1386,7 +1387,8 @@ TEST_P(ComputeShaderTest, NonArraySharedVariable)
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 200, 150, 250}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
expectedValues);
}
// Verify shared non-struct array variables can work correctly.
......@@ -1410,7 +1412,8 @@ TEST_P(ComputeShaderTest, NonStructArrayAsSharedVariable)
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
expectedValues);
}
// Verify shared struct array variables work correctly.
......@@ -1438,7 +1441,111 @@ TEST_P(ComputeShaderTest, StructArrayAsSharedVariable)
const std::array<GLuint, 4> inputData = {{250, 200, 150, 100}};
const std::array<GLuint, 4> expectedValues = {{250, 150, 200, 100}};
runSharedMemoryTest<2, 2>(kCSShader, inputData, expectedValues);
runSharedMemoryTest<GLuint, 2, 2>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
expectedValues);
}
// Verify using atomic functions without return value can work correctly.
// TODO(jiawei.shao@intel.com): add test on atomicExchange and atomicCompSwap.
TEST_P(ComputeShaderTest, AtomicFunctionsNoReturnValue)
{
// TODO(jiawei.shao@intel.com): find out why this shader causes a link error on Android Nexus 5
// bot.
ANGLE_SKIP_TEST_IF(IsAndroid());
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 6, local_size_y = 1, local_size_z = 1) in;
layout (r32ui, binding = 0) readonly uniform highp uimage2D srcImage;
layout (r32ui, binding = 1) writeonly uniform highp uimage2D dstImage;
const uint kSumIndex = 0u;
const uint kMinIndex = 1u;
const uint kMaxIndex = 2u;
const uint kOrIndex = 3u;
const uint kAndIndex = 4u;
const uint kXorIndex = 5u;
shared highp uint results[6];
void main()
{
if (gl_LocalInvocationID.x == kMinIndex || gl_LocalInvocationID.x == kAndIndex)
{
results[gl_LocalInvocationID.x] = 0xFFFFu;
}
else
{
results[gl_LocalInvocationID.x] = 0u;
}
memoryBarrierShared();
barrier();
uint value = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
atomicAdd(results[kSumIndex], value);
atomicMin(results[kMinIndex], value);
atomicMax(results[kMaxIndex], value);
atomicOr(results[kOrIndex], value);
atomicAnd(results[kAndIndex], value);
atomicXor(results[kXorIndex], value);
memoryBarrierShared();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
uvec4(results[gl_LocalInvocationID.x]));
})";
const std::array<GLuint, 6> inputData = {{1, 2, 4, 8, 16, 32}};
const std::array<GLuint, 6> expectedValues = {{63, 1, 32, 63, 0, 63}};
runSharedMemoryTest<GLuint, 6, 1>(kCSShader, GL_R32UI, GL_UNSIGNED_INT, inputData,
expectedValues);
}
// Verify using atomic functions in a non-initializer single assignment can work correctly.
TEST_P(ComputeShaderTest, AtomicFunctionsInNonInitializerSingleAssignment)
{
const char kCSShader[] =
R"(#version 310 es
layout (local_size_x = 9, local_size_y = 1, local_size_z = 1) in;
layout (r32i, binding = 0) readonly uniform highp iimage2D srcImage;
layout (r32i, binding = 1) writeonly uniform highp iimage2D dstImage;
shared highp int sharedVariable;
shared highp int inputData[9];
shared highp int outputData[9];
void main()
{
int inputValue = imageLoad(srcImage, ivec2(gl_LocalInvocationID.xy)).x;
inputData[gl_LocalInvocationID.x] = inputValue;
memoryBarrierShared();
barrier();
if (gl_LocalInvocationID.x == 0u)
{
sharedVariable = 0;
outputData[0] = atomicAdd(sharedVariable, inputData[0]);
outputData[1] = atomicMin(sharedVariable, inputData[1]);
outputData[2] = atomicMax(sharedVariable, inputData[2]);
outputData[3] = atomicAnd(sharedVariable, inputData[3]);
outputData[4] = atomicOr(sharedVariable, inputData[4]);
outputData[5] = atomicXor(sharedVariable, inputData[5]);
outputData[6] = atomicExchange(sharedVariable, inputData[6]);
outputData[7] = atomicCompSwap(sharedVariable, 64, inputData[7]);
outputData[8] = atomicAdd(sharedVariable, inputData[8]);
}
memoryBarrierShared();
barrier();
imageStore(dstImage, ivec2(gl_LocalInvocationID.xy),
ivec4(outputData[gl_LocalInvocationID.x]));
})";
const std::array<GLint, 9> inputData = {{1, 2, 4, 8, 16, 32, 64, 128, 1}};
const std::array<GLint, 9> expectedValues = {{0, 1, 1, 4, 0, 16, 48, 64, 128}};
runSharedMemoryTest<GLint, 9, 1>(kCSShader, GL_R32I, GL_INT, inputData, expectedValues);
}
// Check that it is not possible to create a compute shader when the context does not support ES
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment