Commit c9ba782a by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Support atomic counter array of arrays

Previously, it was assumed that a function argument is either AC or AC[i], and it was converted to AC or AC+i respectively. The code is changed to support any number of dimensions and subscripts, using array size information from AC's type. If AC is an array of array (atomic_uint AC[N][M][R]), the following index calculations are done. AC -> AC.arrayIndex AC[i] -> AC.arrayIndex + i*M*R AC[i][j] -> AC.arrayIndex + i*M*R + j*R AC[i][j][k] -> AC.arrayIndex + i*M*R + j*R + k A test is added to exercise these various forms of indexing: AtomicCounterBufferTest31.AtomicCounterArrayOfArray Bug: angleproject:3566 Change-Id: I1e181a7363463d1d0ee4916f35006ed7c58e0f7c Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1739488 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com>
parent a1f0d234
...@@ -314,51 +314,29 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser ...@@ -314,51 +314,29 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser
new TIntermSymbol(mRetyper.getVariableReplacement(symbolVariable)); new TIntermSymbol(mRetyper.getVariableReplacement(symbolVariable));
ASSERT(bindingOffset != nullptr); ASSERT(bindingOffset != nullptr);
TIntermNode *argument = symbol; TIntermNode *argument = convertFunctionArgument(symbol, &bindingOffset);
TIntermNode *parent = getParentNode(); if (mRetyper.isInAggregate())
ASSERT(parent);
TIntermBinary *arrayExpression = parent->getAsBinaryNode();
if (arrayExpression)
{ {
ASSERT(arrayExpression->getOp() == EOpIndexDirect || mRetyper.replaceFunctionCallArg(argument, bindingOffset);
arrayExpression->getOp() == EOpIndexIndirect); }
else
argument = arrayExpression; {
// If there's a stray ac[i] lying around, just delete it. This can happen if the shader
TIntermTyped *subscript = arrayExpression->getRight(); // uses ac[i].length(), which in RemoveArrayLengthMethod() will result in an ineffective
TIntermConstantUnion *subscriptAsConstant = subscript->getAsConstantUnion(); // statement that's just ac[i]; (similarly for a stray ac;, it doesn't have to be
const bool subscriptIsZero = subscriptAsConstant && subscriptAsConstant->isZero(0); // subscripted). Note that the subscript could have side effects, but the
// convertFunctionArgument above has already generated code that includes the subscript
if (!subscriptIsZero) // (and therefore its side-effect).
TIntermBlock *block = nullptr;
for (size_t ancestorIndex = 0; block == nullptr; ++ancestorIndex)
{ {
// Copy the atomic counter binding/offset constant and modify it by adding the array block = getAncestorNode(ancestorIndex)->getAsBlock();
// subscript to its offset field.
TVariable *modified = CreateTempVariable(mSymbolTable, mAtomicCounterType);
TIntermDeclaration *modifiedDecl =
CreateTempInitDeclarationNode(modified, bindingOffset);
TIntermSymbol *modifiedSymbol = new TIntermSymbol(modified);
TConstantUnion *offsetFieldIndex = new TConstantUnion;
offsetFieldIndex->setIConst(1);
TIntermConstantUnion *offsetFieldRef =
new TIntermConstantUnion(offsetFieldIndex, *StaticType::GetBasic<EbtUInt>());
TIntermBinary *offsetField =
new TIntermBinary(EOpIndexDirectStruct, modifiedSymbol, offsetFieldRef);
TIntermBinary *modifiedOffset = new TIntermBinary(
EOpAddAssign, offsetField, arrayExpression->getRight()->deepCopy());
TIntermSequence *modifySequence =
new TIntermSequence({modifiedDecl, modifiedOffset});
insertStatementsInParentBlock(*modifySequence);
bindingOffset = modifiedSymbol->deepCopy();
} }
}
mRetyper.replaceFunctionCallArg(argument, bindingOffset); TIntermSequence emptySequence;
mMultiReplacements.emplace_back(block, argument, emptySequence);
}
} }
TIntermDeclaration *getAtomicCounterTypeDeclaration() { return mAtomicCounterTypeDeclaration; } TIntermDeclaration *getAtomicCounterTypeDeclaration() { return mAtomicCounterTypeDeclaration; }
...@@ -447,6 +425,152 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser ...@@ -447,6 +425,152 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser
return replacementVar; return replacementVar;
} }
TIntermTyped *convertFunctionArgumentHelper(
const TVector<unsigned int> &runningArraySizeProducts,
TIntermTyped *flattenedSubscript,
uint32_t depth,
uint32_t *subscriptCountOut)
{
std::string prefix(depth, ' ');
TIntermNode *parent = getAncestorNode(depth);
ASSERT(parent);
TIntermBinary *arrayExpression = parent->getAsBinaryNode();
if (!arrayExpression)
{
// If the parent is not an array subscript operation, we have reached the end of the
// subscript chain. Note the depth that's traversed so the corresponding node can be
// taken as the function argument.
*subscriptCountOut = depth;
return flattenedSubscript;
}
ASSERT(arrayExpression->getOp() == EOpIndexDirect ||
arrayExpression->getOp() == EOpIndexIndirect);
// Assume i = n - depth. Get Pi. See comment in convertFunctionArgument.
ASSERT(depth < runningArraySizeProducts.size());
uint32_t thisDimensionSize =
runningArraySizeProducts[runningArraySizeProducts.size() - 1 - depth];
// Get Ii.
TIntermTyped *thisDimensionOffset = arrayExpression->getRight();
TIntermConstantUnion *subscriptAsConstant = thisDimensionOffset->getAsConstantUnion();
const bool subscriptIsZero = subscriptAsConstant && subscriptAsConstant->isZero(0);
// If Ii is zero, don't need to add Ii*Pi; that's zero.
if (!subscriptIsZero)
{
thisDimensionOffset = thisDimensionOffset->deepCopy();
// If Pi is 1, don't multiply. Just accumulate Ii.
if (thisDimensionSize != 1)
{
thisDimensionOffset = new TIntermBinary(EOpMul, thisDimensionOffset,
CreateUIntConstant(thisDimensionSize));
}
// Accumulate with the previous running offset, if any.
if (flattenedSubscript)
{
flattenedSubscript =
new TIntermBinary(EOpAdd, flattenedSubscript, thisDimensionOffset);
}
else
{
flattenedSubscript = thisDimensionOffset;
}
}
// Note: GLSL only allows 2 nested levels of arrays, so this recursion is bounded.
return convertFunctionArgumentHelper(runningArraySizeProducts, flattenedSubscript,
depth + 1, subscriptCountOut);
}
TIntermNode *convertFunctionArgument(TIntermNode *symbol, TIntermTyped **bindingOffset)
{
// Assume a general case of array declaration with N dimensions:
//
// atomic_uint ac[Dn]..[D2][D1];
//
// Let's define
//
// Pn = D(n-1)*...*D2*D1
//
// In that case, we have:
//
// ac[In] = ac + In*Pn
// ac[In][I(n-1)] = ac + In*Pn + I(n-1)*P(n-1)
// ac[In]...[Ii] = ac + In*Pn + ... + Ii*Pi
//
// We have just visited a symbol; ac. Walking the parent chain, we will visit the
// expressions in the above order (ac, ac[In], ac[In][I(n-1)], ...). We therefore can
// simply walk the parent chain and accumulate Ii*Pi to obtain the offset from the base of
// ac.
TIntermSymbol *argumentAsSymbol = symbol->getAsSymbolNode();
ASSERT(argumentAsSymbol);
const TVector<unsigned int> *arraySizes = argumentAsSymbol->getType().getArraySizes();
// Calculate Pi
TVector<unsigned int> runningArraySizeProducts;
if (arraySizes && arraySizes->size() > 0)
{
runningArraySizeProducts.resize(arraySizes->size());
uint32_t runningProduct = 1;
for (size_t dimension = 0; dimension < arraySizes->size(); ++dimension)
{
runningArraySizeProducts[dimension] = runningProduct;
runningProduct *= (*arraySizes)[dimension];
}
}
// Walk the parent chain and accumulate Ii*Pi
uint32_t subscriptCount = 0;
TIntermTyped *flattenedSubscript =
convertFunctionArgumentHelper(runningArraySizeProducts, nullptr, 0, &subscriptCount);
// Find the function argument, which is either in the form of ac (i.e. there are no
// subscripts, in which case that's the function argument), or ac[In]...[Ii] (in which case
// the function argument is the (n-i)th ancestor of ac.
//
// Note that this is the case because no other operation is allowed on ac other than
// subscript.
TIntermNode *argument = subscriptCount == 0 ? symbol : getAncestorNode(subscriptCount - 1);
ASSERT(argument != nullptr);
// If not subscripted, keep the argument as-is.
if (flattenedSubscript == nullptr)
{
return argument;
}
// Copy the atomic counter binding/offset constant and modify it by adding the array
// subscript to its offset field.
TVariable *modified = CreateTempVariable(mSymbolTable, mAtomicCounterType);
TIntermDeclaration *modifiedDecl = CreateTempInitDeclarationNode(modified, *bindingOffset);
TIntermSymbol *modifiedSymbol = new TIntermSymbol(modified);
TConstantUnion *offsetFieldIndex = new TConstantUnion;
offsetFieldIndex->setIConst(1);
TIntermConstantUnion *offsetFieldRef =
new TIntermConstantUnion(offsetFieldIndex, *StaticType::GetBasic<EbtUInt>());
TIntermBinary *offsetField =
new TIntermBinary(EOpIndexDirectStruct, modifiedSymbol, offsetFieldRef);
TIntermBinary *modifiedOffset =
new TIntermBinary(EOpAddAssign, offsetField, flattenedSubscript);
TIntermSequence *modifySequence = new TIntermSequence({modifiedDecl, modifiedOffset});
insertStatementsInParentBlock(*modifySequence);
*bindingOffset = modifiedSymbol->deepCopy();
return argument;
}
void convertBuiltinFunction(TIntermAggregate *node) void convertBuiltinFunction(TIntermAggregate *node)
{ {
// If the function is |memoryBarrierAtomicCounter|, simply replace it with // If the function is |memoryBarrierAtomicCounter|, simply replace it with
......
...@@ -101,6 +101,7 @@ class RetypeOpaqueVariablesHelper ...@@ -101,6 +101,7 @@ class RetypeOpaqueVariablesHelper
// Function call arguments handling: // Function call arguments handling:
void preVisitAggregate() { mReplacedFunctionCallArgs.emplace(); } void preVisitAggregate() { mReplacedFunctionCallArgs.emplace(); }
bool isInAggregate() const { return !mReplacedFunctionCallArgs.empty(); }
void postVisitAggregate() { mReplacedFunctionCallArgs.pop(); } void postVisitAggregate() { mReplacedFunctionCallArgs.pop(); }
void replaceFunctionCallArg(const TIntermNode *oldArg, TIntermTyped *newArg) void replaceFunctionCallArg(const TIntermNode *oldArg, TIntermTyped *newArg)
{ {
......
...@@ -310,6 +310,156 @@ void main() ...@@ -310,6 +310,156 @@ void main()
} }
} }
// Test atomic counter array of array.
TEST_P(AtomicCounterBufferTest31, AtomicCounterArrayOfArray)
{
// Crashes in older drivers. http://anglebug.com/3738
ANGLE_SKIP_TEST_IF(IsOpenGL() && IsWindows() && IsAMD());
// Fails on D3D. Some counters are double-incremented while some are untouched, hinting at a
// bug in index translation. http://anglebug.com/3783
ANGLE_SKIP_TEST_IF(IsD3D11());
// Nvidia's OpenGL driver fails to compile the shader. http://anglebug.com/3791
ANGLE_SKIP_TEST_IF(IsOpenGL() && IsNVIDIA());
// Intel's Windows OpenGL driver crashes in this test. http://anglebug.com/3791
ANGLE_SKIP_TEST_IF(IsOpenGL() && IsIntel() && IsWindows());
// Skipping due to a bug on the Qualcomm Vulkan Android driver.
// http://anglebug.com/3726
ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());
constexpr char kCS[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(binding = 0) uniform atomic_uint ac[7][5][3];
void f0(in atomic_uint ac)
{
atomicCounterIncrement(ac);
}
void f1(in atomic_uint ac[3])
{
atomicCounterIncrement(ac[0]);
f0(ac[1]);
int index = 2;
f0(ac[index]);
}
void f2(in atomic_uint ac[5][3])
{
// Increment all in ac[0], ac[1] and ac[2]
for (int i = 0; i < 3; ++i)
{
for (int j = 0; j < 2; ++j)
{
f0(ac[i][j]);
}
f0(ac[i][2]);
}
// Increment all in ac[3]
f1(ac[3]);
// Increment all in ac[4]
for (int i = 0; i < 2; ++i)
{
atomicCounterIncrement(ac[4][i]);
}
f0(ac[4][2]);
}
void f3(in atomic_uint ac[7][5][3])
{
// Increment all in ac[0], ac[1], ac[2] and ac[3]
f2(ac[0]);
for (int i = 1; i < 4; ++i)
{
f2(ac[i]);
}
// Increment all in ac[5][0], ac[5][1], ac[5][2] and ac[5][3]
for (int i = 0; i < 4; ++i)
{
f1(ac[5][i]);
}
// Increment all in ac[5][4][0], ac[5][4][1] and ac[5][4][2]
f0(ac[5][4][0]);
for (int i = 1; i < 3; ++i)
{
f0(ac[5][4][i]);
}
// Increment all in ac[6]
for (int i = 0; i < 5; ++i)
{
for (int j = 0; j < 2; ++j)
{
atomicCounterIncrement(ac[6][i][j]);
}
atomicCounterIncrement(ac[6][i][2]);
}
}
void main()
{
// Increment all in ac except ac[4]
f3(ac);
// Increment all in ac[4]
f2(ac[4]);
})";
constexpr uint32_t kAtomicCounterRows = 7;
constexpr uint32_t kAtomicCounterCols = 5;
constexpr uint32_t kAtomicCounterDepth = 3;
constexpr uint32_t kAtomicCounterCount =
kAtomicCounterRows * kAtomicCounterCols * kAtomicCounterDepth;
GLint maxAtomicCounters = 0;
glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, &maxAtomicCounters);
EXPECT_GL_NO_ERROR();
// Required minimum is 8 by the spec
EXPECT_GE(maxAtomicCounters, 8);
ANGLE_SKIP_TEST_IF(static_cast<uint32_t>(maxAtomicCounters) < kAtomicCounterCount);
ANGLE_GL_COMPUTE_PROGRAM(program, kCS);
glUseProgram(program.get());
// The initial value of atomic counters is 0, 1, 2, ...
unsigned int bufferData[kAtomicCounterCount] = {};
for (uint32_t index = 0; index < kAtomicCounterCount; ++index)
{
bufferData[index] = index;
}
GLBuffer atomicCounterBuffer;
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(bufferData), bufferData, GL_STATIC_DRAW);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicCounterBuffer);
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
unsigned int result[kAtomicCounterCount] = {};
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
void *mappedBuffer =
glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(bufferData), GL_MAP_READ_BIT);
memcpy(result, mappedBuffer, sizeof(bufferData));
glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
for (uint32_t index = 0; index < kAtomicCounterCount; ++index)
{
EXPECT_EQ(result[index], bufferData[index] + 1) << "index " << index;
}
}
ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest, ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest,
ES3_OPENGL(), ES3_OPENGL(),
ES3_OPENGLES(), ES3_OPENGLES(),
......
...@@ -703,9 +703,6 @@ void main() ...@@ -703,9 +703,6 @@ void main()
// Draw an array of points with the first vertex offset at 0 using gl_VertexID // Draw an array of points with the first vertex offset at 0 using gl_VertexID
TEST_P(GLSLTest_ES3, GLVertexIDOffsetZeroDrawArray) TEST_P(GLSLTest_ES3, GLVertexIDOffsetZeroDrawArray)
{ {
// TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
ANGLE_SKIP_TEST_IF(IsVulkan());
constexpr int kStartIndex = 0; constexpr int kStartIndex = 0;
constexpr int kArrayLength = 5; constexpr int kArrayLength = 5;
constexpr char kVS[] = R"(#version 300 es constexpr char kVS[] = R"(#version 300 es
...@@ -768,9 +765,6 @@ void GLVertexIDIntegerTextureDrawArrays_helper(int first, int count, GLenum err) ...@@ -768,9 +765,6 @@ void GLVertexIDIntegerTextureDrawArrays_helper(int first, int count, GLenum err)
// https://github.com/KhronosGroup/WebGL/blob/master/sdk/tests/conformance2/rendering/vertex-id.html // https://github.com/KhronosGroup/WebGL/blob/master/sdk/tests/conformance2/rendering/vertex-id.html
TEST_P(GLSLTest_ES3, GLVertexIDIntegerTextureDrawArrays) TEST_P(GLSLTest_ES3, GLVertexIDIntegerTextureDrawArrays)
{ {
// TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
ANGLE_SKIP_TEST_IF(IsVulkan());
// Have to set a large point size because the window size is much larger than the texture // Have to set a large point size because the window size is much larger than the texture
constexpr char kVS[] = R"(#version 300 es constexpr char kVS[] = R"(#version 300 es
flat out highp int vVertexID; flat out highp int vVertexID;
...@@ -829,9 +823,6 @@ TEST_P(GLSLTest_ES3, GLVertexIDOffsetFiveDrawArray) ...@@ -829,9 +823,6 @@ TEST_P(GLSLTest_ES3, GLVertexIDOffsetFiveDrawArray)
// Bug in Nexus drivers, offset does not work. (anglebug.com/3264) // Bug in Nexus drivers, offset does not work. (anglebug.com/3264)
ANGLE_SKIP_TEST_IF((IsNexus5X() || IsNexus6P()) && IsOpenGLES()); ANGLE_SKIP_TEST_IF((IsNexus5X() || IsNexus6P()) && IsOpenGLES());
// TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
ANGLE_SKIP_TEST_IF(IsVulkan());
constexpr int kStartIndex = 5; constexpr int kStartIndex = 5;
constexpr int kArrayLength = 5; constexpr int kArrayLength = 5;
constexpr char kVS[] = R"(#version 300 es constexpr char kVS[] = R"(#version 300 es
...@@ -2891,6 +2882,104 @@ TEST_P(GLSLTest_ES3, WriteIntoDynamicIndexingOfSwizzledVector) ...@@ -2891,6 +2882,104 @@ TEST_P(GLSLTest_ES3, WriteIntoDynamicIndexingOfSwizzledVector)
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green); EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
} }
// Test that the length() method is correctly translated in Vulkan atomic counter buffer emulation.
TEST_P(GLSLTest_ES31, AtomicCounterArrayLength)
{
// Crashes on an assertion. The driver reports no atomic counter buffers when queried from the
// program, but ANGLE believes there to be one.
//
// This is likely due to the fact that ANGLE generates the following code, as a side effect of
// the code on which .length() is being called:
//
// _uac1[(_uvalue = _utestSideEffectValue)];
//
// The driver is optimizing the subscription out, and calling the atomic counter inactive. This
// was observed on nvidia, mesa and amd/windows.
//
// The fix would be for ANGLE to skip uniforms it believes should exist, but when queried, the
// driver says don't.
//
// http://anglebug.com/3782
ANGLE_SKIP_TEST_IF(IsOpenGL());
// Skipping due to a bug on the Qualcomm Vulkan Android driver.
// http://anglebug.com/3726
ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());
constexpr char kCS[] = R"(#version 310 es
precision mediump float;
layout(local_size_x=1) in;
layout(binding = 0) uniform atomic_uint ac1[2][3];
uniform uint testSideEffectValue;
layout(binding = 1, std140) buffer Result
{
uint value;
} result;
void main() {
bool passed = true;
if (ac1.length() != 2)
{
passed = false;
}
uint value = 0u;
if (ac1[(value = testSideEffectValue)].length() != 3)
{
passed = false;
}
if (value != testSideEffectValue)
{
passed = false;
}
result.value = passed ? 255u : 127u;
})";
constexpr unsigned int kUniformTestValue = 17;
constexpr unsigned int kExpectedSuccessValue = 255;
constexpr unsigned int kAtomicCounterRows = 2;
constexpr unsigned int kAtomicCounterCols = 3;
GLint maxAtomicCounters = 0;
glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, &maxAtomicCounters);
EXPECT_GL_NO_ERROR();
// Required minimum is 8 by the spec
EXPECT_GE(maxAtomicCounters, 8);
ANGLE_SKIP_TEST_IF(static_cast<uint32_t>(maxAtomicCounters) <
kAtomicCounterRows * kAtomicCounterCols);
ANGLE_GL_COMPUTE_PROGRAM(program, kCS);
glUseProgram(program.get());
constexpr unsigned int kBufferData[kAtomicCounterRows * kAtomicCounterCols] = {};
GLBuffer atomicCounterBuffer;
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(kBufferData), kBufferData, GL_STATIC_DRAW);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicCounterBuffer);
constexpr unsigned int kOutputInitValue = 0;
GLBuffer shaderStorageBuffer;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, shaderStorageBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(kOutputInitValue), &kOutputInitValue,
GL_STATIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, shaderStorageBuffer);
GLint uniformLocation = glGetUniformLocation(program.get(), "testSideEffectValue");
EXPECT_NE(uniformLocation, -1);
glUniform1i(uniformLocation, kUniformTestValue);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
const GLuint *ptr = reinterpret_cast<const GLuint *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint), GL_MAP_READ_BIT));
EXPECT_EQ(*ptr, kExpectedSuccessValue);
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
// Test that array indices for arrays of arrays work as expected. // Test that array indices for arrays of arrays work as expected.
TEST_P(GLSLTest_ES31, ArraysOfArrays) TEST_P(GLSLTest_ES31, ArraysOfArrays)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment