Commit b82d8633 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Atomic counter buffer support

Vulkan doesn't treat atomic counters especially, and they are emulated with atomic access to storage buffers. A single atomic counter buffer binding per pipeline is supported. All the atomic counters identify an offset within this buffer. The shader is modified to include a storage buffer definition with `uint counters[];` as the only field. A compiler pass replaces atomic counter definitions with variables that hold the corresponding offset parameter, as well as changing atomic_uint types to just uint (as the offset). Where an atomic counter variable is used, it is replaced with the offset variable (plus the array index, if array). At the same time, built-in `atomicCounter*` functions are replaced with a corresponding `atomic*` function and `memoryBarrierAtomicCounter` is replaced with `memoryBarrierBuffer`. Bug: angleproject:3566 Change-Id: Iefb3d47de6a5cb3072bfa0cb94a46ac6a886d369 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1704635 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarTim Van Patten <timvp@google.com>
parent bd4ff479
......@@ -152,6 +152,8 @@ angle_translator_sources = [
"src/compiler/translator/tree_ops/RemovePow.h",
"src/compiler/translator/tree_ops/RemoveUnreferencedVariables.cpp",
"src/compiler/translator/tree_ops/RemoveUnreferencedVariables.h",
"src/compiler/translator/tree_ops/RewriteAtomicCounters.cpp",
"src/compiler/translator/tree_ops/RewriteAtomicCounters.h",
"src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp",
"src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.h",
"src/compiler/translator/tree_ops/RewriteDfdy.cpp",
......
......@@ -693,6 +693,11 @@ inline bool IsQualifierUnspecified(TQualifier qualifier)
return (qualifier == EvqTemporary || qualifier == EvqGlobal);
}
inline bool IsStorageBuffer(TQualifier qualifier)
{
return qualifier == EvqBuffer;
}
enum TLayoutImageInternalFormat
{
EiifUnspecified,
......
......@@ -214,31 +214,31 @@ std::string TOutputGLSLBase::getMemoryQualifiers(const TType &type)
const TMemoryQualifier &memoryQualifier = type.getMemoryQualifier();
if (memoryQualifier.readonly)
{
ASSERT(IsImage(type.getBasicType()));
ASSERT(IsImage(type.getBasicType()) || IsStorageBuffer(type.getQualifier()));
out << "readonly ";
}
if (memoryQualifier.writeonly)
{
ASSERT(IsImage(type.getBasicType()));
ASSERT(IsImage(type.getBasicType()) || IsStorageBuffer(type.getQualifier()));
out << "writeonly ";
}
if (memoryQualifier.coherent)
{
ASSERT(IsImage(type.getBasicType()));
ASSERT(IsImage(type.getBasicType()) || IsStorageBuffer(type.getQualifier()));
out << "coherent ";
}
if (memoryQualifier.restrictQualifier)
{
ASSERT(IsImage(type.getBasicType()));
ASSERT(IsImage(type.getBasicType()) || IsStorageBuffer(type.getQualifier()));
out << "restrict ";
}
if (memoryQualifier.volatileQualifier)
{
ASSERT(IsImage(type.getBasicType()));
ASSERT(IsImage(type.getBasicType()) || IsStorageBuffer(type.getQualifier()));
out << "volatile ";
}
......
......@@ -84,7 +84,10 @@ void TOutputVulkanGLSL::writeLayoutQualifier(TIntermTyped *variable)
storage = EbsStd140;
}
blockStorage = getBlockStorageString(storage);
if (interfaceBlock->blockStorage() != EbsUnspecified)
{
blockStorage = getBlockStorageString(storage);
}
}
// Specify matrix packing if necessary.
......
......@@ -17,6 +17,7 @@
#include "compiler/translator/OutputVulkanGLSL.h"
#include "compiler/translator/StaticType.h"
#include "compiler/translator/tree_ops/NameEmbeddedUniformStructs.h"
#include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
#include "compiler/translator/tree_ops/RewriteDfdy.h"
#include "compiler/translator/tree_ops/RewriteStructSamplers.h"
#include "compiler/translator/tree_util/BuiltIn_autogen.h"
......@@ -151,6 +152,14 @@ class DeclareDefaultUniformsTraverser : public TIntermTraverser
bool mInDefaultUniform;
};
TIntermConstantUnion *CreateFloatConstant(float value)
{
const TType *constantType = StaticType::GetBasic<EbtFloat, 1>();
TConstantUnion *constantValue = new TConstantUnion;
constantValue->setFConst(value);
return new TIntermConstantUnion(constantValue, *constantType);
}
constexpr ImmutableString kFlippedPointCoordName = ImmutableString("flippedPointCoord");
constexpr ImmutableString kFlippedFragCoordName = ImmutableString("flippedFragCoord");
constexpr ImmutableString kEmulatedDepthRangeParams = ImmutableString("ANGLEDepthRangeParams");
......@@ -168,18 +177,6 @@ constexpr std::array<const char *, kNumDriverUniforms> kDriverUniformNames = {
{kViewport, kHalfRenderAreaHeight, kViewportYScale, kNegViewportYScale, kXfbActiveUnpaused,
kXfbBufferOffsets, kDepthRange}};
template <TBasicType BasicType = EbtFloat, unsigned char PrimarySize = 1>
TIntermConstantUnion *CreateBasicConstant(float value)
{
const TType *constantType = StaticType::GetBasic<BasicType, PrimarySize>();
TConstantUnion *constantValue = new TConstantUnion[PrimarySize];
for (unsigned char sizeIndex = 0; sizeIndex < PrimarySize; ++sizeIndex)
{
constantValue[sizeIndex].setFConst(value);
}
return new TIntermConstantUnion(constantValue, *constantType);
}
size_t FindFieldIndex(const TFieldList &fieldList, const char *fieldName)
{
for (size_t fieldIndex = 0; fieldIndex < fieldList.size(); ++fieldIndex)
......@@ -297,7 +294,7 @@ void AppendVertexShaderDepthCorrectionToMain(TIntermBlock *root, TSymbolTable *s
TIntermSwizzle *positionZ = new TIntermSwizzle(positionRef, swizzleOffsetZ);
// Create a constant "0.5"
TIntermConstantUnion *oneHalf = CreateBasicConstant(0.5f);
TIntermConstantUnion *oneHalf = CreateFloatConstant(0.5f);
// Create a swizzle to "gl_Position.w"
TVector<int> swizzleOffsetW;
......@@ -376,29 +373,9 @@ const TVariable *AddDriverUniformsToShader(TIntermBlock *root, TSymbolTable *sym
driverFieldList->push_back(driverUniformField);
}
// Define a driver uniform block "ANGLEUniformBlock".
TLayoutQualifier driverLayoutQualifier = TLayoutQualifier::Create();
TInterfaceBlock *interfaceBlock =
new TInterfaceBlock(symbolTable, ImmutableString("ANGLEUniformBlock"), driverFieldList,
driverLayoutQualifier, SymbolType::AngleInternal);
// Make the inteface block into a declaration. Use instance name "ANGLEUniforms".
TType *interfaceBlockType = new TType(interfaceBlock, EvqUniform, driverLayoutQualifier);
TIntermDeclaration *driverUniformsDecl = new TIntermDeclaration;
TVariable *driverUniformsVar = new TVariable(symbolTable, ImmutableString("ANGLEUniforms"),
interfaceBlockType, SymbolType::AngleInternal);
TIntermSymbol *driverUniformsDeclarator = new TIntermSymbol(driverUniformsVar);
driverUniformsDecl->appendDeclarator(driverUniformsDeclarator);
// Insert the declarations before first function, since functions before main() may refer to
// these values.
TIntermSequence *insertSequence = new TIntermSequence;
insertSequence->push_back(driverUniformsDecl);
size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
root->insertChildNodes(firstFunctionIndex, *insertSequence);
return driverUniformsVar;
// Define a driver uniform block "ANGLEUniformBlock" with instance name "ANGLEUniforms".
return DeclareInterfaceBlock(root, symbolTable, driverFieldList, EvqUniform,
TMemoryQualifier::Create(), "ANGLEUniformBlock", "ANGLEUniforms");
}
TIntermPreprocessorDirective *GenerateLineRasterIfDef()
......@@ -524,7 +501,7 @@ void AddLineSegmentRasterizationEmulation(TInfoSinkBase &sink,
TIntermBinary *positionNDC = new TIntermBinary(EOpDiv, positionXY, positionW);
// ANGLEPosition * 0.5
TIntermConstantUnion *oneHalf = CreateBasicConstant(0.5f);
TIntermConstantUnion *oneHalf = CreateFloatConstant(0.5f);
TIntermBinary *halfPosition = new TIntermBinary(EOpVectorTimesScalar, positionNDC, oneHalf);
// (ANGLEPosition * 0.5) + 0.5
......@@ -562,7 +539,7 @@ void AddLineSegmentRasterizationEmulation(TInfoSinkBase &sink,
TIntermBinary *baSq = new TIntermBinary(EOpMul, ba, ba->deepCopy());
// 2.0 * ba * ba
TIntermTyped *two = CreateBasicConstant(2.0f);
TIntermTyped *two = CreateFloatConstant(2.0f);
TIntermBinary *twoBaSq = new TIntermBinary(EOpVectorTimesScalar, baSq, two);
// Assign to a temporary "ba2".
......@@ -593,7 +570,7 @@ void AddLineSegmentRasterizationEmulation(TInfoSinkBase &sink,
// Using a small epsilon value ensures that we don't suffer from numerical instability when
// lines are exactly vertical or horizontal.
static constexpr float kEpisilon = 0.00001f;
TIntermConstantUnion *epsilon = CreateBasicConstant(kEpisilon);
TIntermConstantUnion *epsilon = CreateFloatConstant(kEpisilon);
// bp.x > epsilon
TIntermBinary *checkX = new TIntermBinary(EOpGreaterThan, bpX, epsilon);
......@@ -654,6 +631,7 @@ void TranslatorVulkan::translate(TIntermBlock *root,
// Write out default uniforms into a uniform block assigned to a specific set/binding.
int defaultUniformCount = 0;
int structTypesUsedForUniforms = 0;
int atomicCounterCount = 0;
for (const auto &uniform : getUniforms())
{
if (!uniform.isBuiltIn() && uniform.staticUse && !gl::IsOpaqueType(uniform.type))
......@@ -665,6 +643,11 @@ void TranslatorVulkan::translate(TIntermBlock *root,
{
++structTypesUsedForUniforms;
}
if (gl::IsAtomicCounterType(uniform.type))
{
++atomicCounterCount;
}
}
// TODO(lucferron): Refactor this function to do less tree traversals.
......@@ -692,6 +675,11 @@ void TranslatorVulkan::translate(TIntermBlock *root,
sink << "};\n";
}
if (atomicCounterCount > 0)
{
RewriteAtomicCounters(root, &getSymbolTable());
}
const TVariable *driverUniforms = nullptr;
if (getShaderType() != GL_COMPUTE_SHADER)
{
......@@ -762,7 +750,7 @@ void TranslatorVulkan::translate(TIntermBlock *root,
{
TIntermBinary *viewportYScale =
CreateDriverUniformRef(driverUniforms, kNegViewportYScale);
TIntermConstantUnion *pivot = CreateBasicConstant(0.5f);
TIntermConstantUnion *pivot = CreateFloatConstant(0.5f);
FlipBuiltinVariable(root, GetMainSequence(root), viewportYScale, &getSymbolTable(),
BuiltInVariable::gl_PointCoord(), kFlippedPointCoordName, pivot);
}
......
......@@ -329,6 +329,7 @@ class TType
void realize();
bool isSampler() const { return IsSampler(type); }
bool isAtomicCounter() const { return IsAtomicCounter(type); }
private:
void invalidateMangledName();
......
//
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RewriteAtomicCounters: Emulate atomic counter buffers with storage buffers.
//
#include "compiler/translator/tree_ops/RewriteAtomicCounters.h"
#include "compiler/translator/ImmutableStringBuilder.h"
#include "compiler/translator/StaticType.h"
#include "compiler/translator/SymbolTable.h"
#include "compiler/translator/tree_util/IntermNode_util.h"
#include "compiler/translator/tree_util/IntermTraverse.h"
namespace sh
{
namespace
{
// DeclareAtomicCountersBuffer adds a storage buffer that's used with atomic counters.
const TVariable *DeclareAtomicCountersBuffer(TIntermBlock *root, TSymbolTable *symbolTable)
{
// Define `uint counters[];` as the only field in the interface block.
TFieldList *fieldList = new TFieldList;
TType *counterType = new TType(EbtUInt);
counterType->makeArray(0);
TField *countersField = new TField(counterType, ImmutableString("counters"), TSourceLoc(),
SymbolType::AngleInternal);
fieldList->push_back(countersField);
TMemoryQualifier coherentMemory = TMemoryQualifier::Create();
coherentMemory.coherent = true;
// Define a storage block "ANGLEAtomicCounters" with instance name "atomicCounters".
return DeclareInterfaceBlock(root, symbolTable, fieldList, EvqBuffer, coherentMemory,
"ANGLEAtomicCounters", "atomicCounters");
}
TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTyped *offset)
{
TIntermSymbol *atomicCountersRef = new TIntermSymbol(atomicCounters);
TConstantUnion *firstFieldIndex = new TConstantUnion;
firstFieldIndex->setIConst(0);
TIntermConstantUnion *firstFieldRef =
new TIntermConstantUnion(firstFieldIndex, *StaticType::GetBasic<EbtUInt>());
TIntermBinary *firstField =
new TIntermBinary(EOpIndexDirectInterfaceBlock, atomicCountersRef, firstFieldRef);
return new TIntermBinary(EOpIndexDirect, firstField, offset);
}
TIntermConstantUnion *CreateUIntConstant(uint32_t value)
{
const TType *constantType = StaticType::GetBasic<EbtUInt, 1>();
TConstantUnion *constantValue = new TConstantUnion;
constantValue->setUConst(value);
return new TIntermConstantUnion(constantValue, *constantType);
}
// Traverser that:
//
// 1. Converts the |atomic_uint| types to |uint|.
// 2. Substitutes the |uniform atomic_uint| declarations with a global declaration that holds the
// offset.
// 3. Substitutes |atomicVar[n]| with |offset + n|.
class RewriteAtomicCountersTraverser : public TIntermTraverser
{
public:
RewriteAtomicCountersTraverser(TSymbolTable *symbolTable, const TVariable *atomicCounters)
: TIntermTraverser(true, true, true, symbolTable),
mAtomicCounters(atomicCounters),
mCurrentAtomicCounterOffset(0),
mCurrentAtomicCounterDecl(nullptr),
mCurrentAtomicCounterDeclParent(nullptr)
{}
bool visitDeclaration(Visit visit, TIntermDeclaration *node) override
{
const TIntermSequence &sequence = *(node->getSequence());
TIntermTyped *variable = sequence.front()->getAsTyped();
const TType &type = variable->getType();
bool isAtomicCounter = type.getQualifier() == EvqUniform && type.isAtomicCounter();
if (visit == PreVisit || visit == InVisit)
{
if (isAtomicCounter)
{
// We only support one atomic counter buffer, so the binding should necessarily be
// 0.
ASSERT(type.getLayoutQualifier().binding == 0);
mCurrentAtomicCounterDecl = node;
mCurrentAtomicCounterDeclParent = getParentNode()->getAsBlock();
mCurrentAtomicCounterOffset = type.getLayoutQualifier().offset;
}
}
else if (visit == PostVisit)
{
mCurrentAtomicCounterDecl = nullptr;
mCurrentAtomicCounterDeclParent = nullptr;
mCurrentAtomicCounterOffset = 0;
}
return true;
}
void visitFunctionPrototype(TIntermFunctionPrototype *node) override
{
const TFunction *function = node->getFunction();
// Go over the parameters and replace the atomic arguments with a uint type. If this is
// the function definition, keep the replaced variable for future encounters.
mAtomicCounterFunctionParams.clear();
for (size_t paramIndex = 0; paramIndex < function->getParamCount(); ++paramIndex)
{
const TVariable *param = function->getParam(paramIndex);
TVariable *replacement = convertFunctionParameter(node, param);
if (replacement)
{
mAtomicCounterFunctionParams[param] = replacement;
}
}
if (mAtomicCounterFunctionParams.empty())
{
return;
}
// Create a new function prototype and replace this with it.
TFunction *replacementFunction = new TFunction(
mSymbolTable, function->name(), SymbolType::UserDefined,
new TType(function->getReturnType()), function->isKnownToNotHaveSideEffects());
for (size_t paramIndex = 0; paramIndex < function->getParamCount(); ++paramIndex)
{
const TVariable *param = function->getParam(paramIndex);
TVariable *replacement = nullptr;
if (param->getType().isAtomicCounter())
{
ASSERT(mAtomicCounterFunctionParams.count(param) != 0);
replacement = mAtomicCounterFunctionParams[param];
}
else
{
replacement = new TVariable(mSymbolTable, param->name(),
new TType(param->getType()), SymbolType::UserDefined);
}
replacementFunction->addParameter(replacement);
}
TIntermFunctionPrototype *replacementPrototype =
new TIntermFunctionPrototype(replacementFunction);
queueReplacement(replacementPrototype, OriginalNode::IS_DROPPED);
mReplacedFunctions[function] = replacementFunction;
}
bool visitAggregate(Visit visit, TIntermAggregate *node) override
{
if (visit == PreVisit)
{
mAtomicCounterFunctionCallArgs.clear();
}
if (visit != PostVisit)
{
return true;
}
if (node->getOp() == EOpCallBuiltInFunction)
{
convertBuiltinFunction(node);
}
else if (node->getOp() == EOpCallFunctionInAST)
{
convertASTFunction(node);
}
return true;
}
void visitSymbol(TIntermSymbol *symbol) override
{
const TVariable *symbolVariable = &symbol->variable();
if (mCurrentAtomicCounterDecl)
{
declareAtomicCounter(symbolVariable);
return;
}
if (!symbol->getType().isAtomicCounter())
{
return;
}
// The symbol is either referencing a global atomic counter, or is a function parameter. In
// either case, it could be an array. The are the following possibilities:
//
// layout(..) uniform atomic_uint ac;
// layout(..) uniform atomic_uint acArray[N];
//
// void func(inout atomic_uint c)
// {
// otherFunc(c);
// }
//
// void funcArray(inout atomic_uint cArray[N])
// {
// otherFuncArray(cArray);
// otherFunc(cArray[n]);
// }
//
// void funcGlobal()
// {
// func(ac);
// func(acArray[n]);
// funcArray(acArray);
// atomicIncrement(ac);
// atomicIncrement(acArray[n]);
// }
//
// This should translate to:
//
// buffer ANGLEAtomicCounters
// {
// uint counters[];
// } atomicCounters;
//
// const uint ac = <offset>;
// const uint acArray = <offset>;
//
// void func(inout uint c)
// {
// otherFunc(c);
// }
//
// void funcArray(inout uint cArray)
// {
// otherFuncArray(cArray);
// otherFunc(cArray + n);
// }
//
// void funcGlobal()
// {
// func(ac);
// func(acArray+n);
// funcArray(acArray);
// atomicAdd(atomicCounters.counters[ac]);
// atomicAdd(atomicCounters.counters[ac+n]);
// }
//
// In all cases, the argument transformation is stored in |mAtomicCounterFunctionCallArgs|.
// In the function call's PostVisit, if it's a builtin, the look up in
// |atomicCounters.counters| is done as well as the builtin function change. Otherwise,
// the transformed argument is passed on as is.
//
TIntermTyped *offset = nullptr;
if (mAtomicCounterOffsets.count(symbolVariable) != 0)
{
offset = new TIntermSymbol(mAtomicCounterOffsets[symbolVariable]);
}
else
{
ASSERT(mAtomicCounterFunctionParams.count(symbolVariable) != 0);
offset = new TIntermSymbol(mAtomicCounterFunctionParams[symbolVariable]);
}
TIntermNode *argument = symbol;
TIntermNode *parent = getParentNode();
ASSERT(parent);
TIntermBinary *arrayExpression = parent->getAsBinaryNode();
if (arrayExpression)
{
ASSERT(arrayExpression->getOp() == EOpIndexDirect ||
arrayExpression->getOp() == EOpIndexIndirect);
offset = new TIntermBinary(EOpAdd, offset, arrayExpression->getRight()->deepCopy());
argument = arrayExpression;
}
mAtomicCounterFunctionCallArgs[argument] = offset;
}
private:
void declareAtomicCounter(const TVariable *symbolVariable)
{
// Create a global variable that contains the offset of this atomic counter declaration.
TType *uintType = new TType(*StaticType::GetBasic<EbtUInt, 1>());
uintType->setQualifier(EvqConst);
TVariable *offset =
new TVariable(mSymbolTable, symbolVariable->name(), uintType, SymbolType::UserDefined);
ASSERT(mCurrentAtomicCounterOffset % 4 == 0);
TIntermConstantUnion *offsetInitValue = CreateIndexNode(mCurrentAtomicCounterOffset / 4);
TIntermSymbol *offsetSymbol = new TIntermSymbol(offset);
TIntermBinary *offsetInit = new TIntermBinary(EOpInitialize, offsetSymbol, offsetInitValue);
TIntermDeclaration *offsetDeclaration = new TIntermDeclaration();
offsetDeclaration->appendDeclarator(offsetInit);
// Replace the atomic_uint declaration with the offset declaration.
TIntermSequence replacement;
replacement.push_back(offsetDeclaration);
mMultiReplacements.emplace_back(mCurrentAtomicCounterDeclParent, mCurrentAtomicCounterDecl,
replacement);
// Remember the offset variable.
mAtomicCounterOffsets[symbolVariable] = offset;
}
TVariable *convertFunctionParameter(TIntermNode *parent, const TVariable *param)
{
if (!param->getType().isAtomicCounter())
{
return nullptr;
}
const TType *newType = StaticType::GetBasic<EbtUInt>();
TVariable *replacementVar =
new TVariable(mSymbolTable, param->name(), newType, SymbolType::UserDefined);
return replacementVar;
}
void convertBuiltinFunction(TIntermAggregate *node)
{
// If the function is |memoryBarrierAtomicCounter|, simply replace it with
// |memoryBarrierBuffer|.
if (node->getFunction()->name() == "memoryBarrierAtomicCounter")
{
TIntermTyped *substituteCall = CreateBuiltInFunctionCallNode(
"memoryBarrierBuffer", new TIntermSequence, *mSymbolTable, 310);
queueReplacement(substituteCall, OriginalNode::IS_DROPPED);
return;
}
// If it's an |atomicCounter*| function, replace the function with an |atomic*| equivalent.
if (!node->getFunction()->isAtomicCounterFunction())
{
return;
}
const ImmutableString &functionName = node->getFunction()->name();
TIntermSequence *arguments = node->getSequence();
// Note: atomicAdd(0) is used for atomic reads.
uint32_t valueChange = 0;
constexpr char kAtomicAddFunction[] = "atomicAdd";
bool isDecrement = false;
if (functionName == "atomicCounterIncrement")
{
valueChange = 1;
}
else if (functionName == "atomicCounterDecrement")
{
// uint values are required to wrap around, so 0xFFFFFFFFu is used as -1.
valueChange = std::numeric_limits<uint32_t>::max();
static_assert(static_cast<uint32_t>(-1) == std::numeric_limits<uint32_t>::max(),
"uint32_t max is not -1");
isDecrement = true;
}
else
{
ASSERT(functionName == "atomicCounter");
}
const TIntermNode *param = (*arguments)[0];
ASSERT(mAtomicCounterFunctionCallArgs.count(param) != 0);
TIntermTyped *offset = mAtomicCounterFunctionCallArgs[param];
TIntermSequence *substituteArguments = new TIntermSequence;
substituteArguments->push_back(CreateAtomicCounterRef(mAtomicCounters, offset));
substituteArguments->push_back(CreateUIntConstant(valueChange));
TIntermTyped *substituteCall = CreateBuiltInFunctionCallNode(
kAtomicAddFunction, substituteArguments, *mSymbolTable, 310);
// Note that atomicCounterDecrement returns the *new* value instead of the prior value,
// unlike atomicAdd. So we need to do a -1 on the result as well.
if (isDecrement)
{
substituteCall = new TIntermBinary(EOpSub, substituteCall, CreateUIntConstant(1));
}
queueReplacement(substituteCall, OriginalNode::IS_DROPPED);
}
void convertASTFunction(TIntermAggregate *node)
{
// See if the function needs replacement at all.
const TFunction *function = node->getFunction();
if (mReplacedFunctions.count(function) == 0)
{
return;
}
// atomic_uint arguments to this call are staged to be replaced at the same time.
TFunction *substituteFunction = mReplacedFunctions[function];
TIntermSequence *substituteArguments = new TIntermSequence;
for (size_t paramIndex = 0; paramIndex < function->getParamCount(); ++paramIndex)
{
TIntermNode *param = node->getChildNode(paramIndex);
TIntermNode *replacement = nullptr;
if (param->getAsTyped()->getType().isAtomicCounter())
{
ASSERT(mAtomicCounterFunctionCallArgs.count(param) != 0);
replacement = mAtomicCounterFunctionCallArgs[param];
}
else
{
replacement = param->getAsTyped()->deepCopy();
}
substituteArguments->push_back(replacement);
}
TIntermTyped *substituteCall =
TIntermAggregate::CreateFunctionCall(*substituteFunction, substituteArguments);
queueReplacement(substituteCall, OriginalNode::IS_DROPPED);
}
private:
const TVariable *mAtomicCounters;
// A map from the atomic_uint variable to the offset declaration.
std::unordered_map<const TVariable *, TVariable *> mAtomicCounterOffsets;
// A map from functions with atomic_uint parameters to one where that's replaced with uint.
std::unordered_map<const TFunction *, TFunction *> mReplacedFunctions;
// A map from atomic_uint function parameters to their replacement uint parameter for the
// current function definition.
std::unordered_map<const TVariable *, TVariable *> mAtomicCounterFunctionParams;
// A map from atomic_uint function call arguments to their replacement for the current
// non-builtin function call.
std::unordered_map<const TIntermNode *, TIntermTyped *> mAtomicCounterFunctionCallArgs;
uint32_t mCurrentAtomicCounterOffset;
TIntermDeclaration *mCurrentAtomicCounterDecl;
TIntermAggregateBase *mCurrentAtomicCounterDeclParent;
};
} // anonymous namespace
void RewriteAtomicCounters(TIntermBlock *root, TSymbolTable *symbolTable)
{
const TVariable *atomicCounters = DeclareAtomicCountersBuffer(root, symbolTable);
RewriteAtomicCountersTraverser traverser(symbolTable, atomicCounters);
root->traverse(&traverser);
traverser.updateTree();
}
} // namespace sh
//
// Copyright 2019 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// RewriteAtomicCounters: Change atomic counter buffers to storage buffers, with atomic counter
// variables being offsets into the uint array of that storage buffer.
#ifndef COMPILER_TRANSLATOR_TREEOPS_REWRITEATOMICCOUNTERS_H_
#define COMPILER_TRANSLATOR_TREEOPS_REWRITEATOMICCOUNTERS_H_
namespace sh
{
class TIntermBlock;
class TSymbolTable;
class TVariable;
void RewriteAtomicCounters(TIntermBlock *root, TSymbolTable *symbolTable);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_REWRITEATOMICCOUNTERS_H_
......@@ -208,6 +208,40 @@ TVariable *DeclareTempVariable(TSymbolTable *symbolTable,
return variable;
}
const TVariable *DeclareInterfaceBlock(TIntermBlock *root,
TSymbolTable *symbolTable,
TFieldList *fieldList,
TQualifier qualifier,
const TMemoryQualifier &memoryQualifier,
const char *blockTypeName,
const char *blockVariableName)
{
// Define an interface block.
TLayoutQualifier layoutQualifier = TLayoutQualifier::Create();
TInterfaceBlock *interfaceBlock =
new TInterfaceBlock(symbolTable, ImmutableString(blockTypeName), fieldList, layoutQualifier,
SymbolType::AngleInternal);
// Turn the inteface block into a declaration.
TType *interfaceBlockType = new TType(interfaceBlock, qualifier, layoutQualifier);
interfaceBlockType->setMemoryQualifier(memoryQualifier);
TIntermDeclaration *interfaceBlockDecl = new TIntermDeclaration;
TVariable *interfaceBlockVar = new TVariable(symbolTable, ImmutableString(blockVariableName),
interfaceBlockType, SymbolType::AngleInternal);
TIntermSymbol *interfaceBlockDeclarator = new TIntermSymbol(interfaceBlockVar);
interfaceBlockDecl->appendDeclarator(interfaceBlockDeclarator);
// Insert the declarations before the first function.
TIntermSequence *insertSequence = new TIntermSequence;
insertSequence->push_back(interfaceBlockDecl);
size_t firstFunctionIndex = FindFirstFunctionDefinitionIndex(root);
root->insertChildNodes(firstFunctionIndex, *insertSequence);
return interfaceBlockVar;
}
TIntermBlock *EnsureBlock(TIntermNode *node)
{
if (node == nullptr)
......
......@@ -10,6 +10,7 @@
#define COMPILER_TRANSLATOR_INTERMNODEUTIL_H_
#include "compiler/translator/IntermNode.h"
#include "compiler/translator/tree_util/FindFunction.h"
namespace sh
{
......@@ -42,6 +43,13 @@ TVariable *DeclareTempVariable(TSymbolTable *symbolTable,
TIntermTyped *initializer,
TQualifier qualifier,
TIntermDeclaration **declarationOut);
const TVariable *DeclareInterfaceBlock(TIntermBlock *root,
TSymbolTable *symbolTable,
TFieldList *fieldList,
TQualifier qualifier,
const TMemoryQualifier &memoryQualifier,
const char *blockTypeName,
const char *blockVariableName);
// If the input node is nullptr, return nullptr.
// If the input node is a block node, return it.
......@@ -65,4 +73,4 @@ TIntermTyped *CreateBuiltInFunctionCallNode(const char *name,
} // namespace sh
#endif // COMPILER_TRANSLATOR_INTERMNODEUTIL_H_
\ No newline at end of file
#endif // COMPILER_TRANSLATOR_INTERMNODEUTIL_H_
......@@ -11,6 +11,8 @@
#include "common/platform.h"
#include <stdint.h>
namespace gl
{
......@@ -72,6 +74,14 @@ enum
// Implementation upper limits, real maximums depend on the hardware.
IMPLEMENTATION_MAX_SHADER_STORAGE_BUFFER_BINDINGS = 64
};
namespace limits
{
// Some of the minimums required by GL, used to detect if the backend meets the minimum requirement.
// Currently, there's no need to separate these values per spec version.
constexpr uint32_t kMinimumComputeStorageBuffers = 4;
} // namespace limits
} // namespace gl
#endif // LIBANGLE_CONSTANTS_H_
......@@ -3377,6 +3377,11 @@ void Context::initCaps()
LimitCap(&mState.mCaps.maxImageUnits, IMPLEMENTATION_MAX_IMAGE_UNITS);
for (ShaderType shaderType : AllShaderTypes())
{
LimitCap(&mState.mCaps.maxShaderAtomicCounterBuffers[shaderType],
IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS);
}
LimitCap(&mState.mCaps.maxCombinedAtomicCounterBuffers,
IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS);
......
......@@ -490,6 +490,8 @@ template <typename T>
using UniformBuffersArray = std::array<T, IMPLEMENTATION_MAX_UNIFORM_BUFFER_BINDINGS>;
template <typename T>
using StorageBuffersArray = std::array<T, IMPLEMENTATION_MAX_SHADER_STORAGE_BUFFER_BINDINGS>;
template <typename T>
using AtomicCounterBuffersArray = std::array<T, IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS>;
using ImageUnitMask = angle::BitSet<IMPLEMENTATION_MAX_IMAGE_UNITS>;
......
......@@ -683,7 +683,8 @@ ANGLE_INLINE angle::Result ContextVk::handleDirtyShaderResourcesImpl(
vk::CommandBuffer *commandBuffer,
vk::CommandGraphResource *recorder)
{
if (mProgram->hasUniformBuffers() || mProgram->hasStorageBuffers())
if (mProgram->hasUniformBuffers() || mProgram->hasStorageBuffers() ||
mProgram->hasAtomicCounterBuffers())
{
ANGLE_TRY(mProgram->updateShaderResourcesDescriptorSet(this, recorder));
}
......@@ -1953,7 +1954,8 @@ void ContextVk::invalidateCurrentTextures()
void ContextVk::invalidateCurrentShaderResources()
{
ASSERT(mProgram);
if (mProgram->hasUniformBuffers() || mProgram->hasStorageBuffers())
if (mProgram->hasUniformBuffers() || mProgram->hasStorageBuffers() ||
mProgram->hasAtomicCounterBuffers())
{
mGraphicsDirtyBits.set(DIRTY_BIT_SHADER_RESOURCES);
mGraphicsDirtyBits.set(DIRTY_BIT_DESCRIPTOR_SETS);
......
......@@ -735,6 +735,30 @@ uint32_t AssignInterfaceBlockBindings(const std::vector<gl::InterfaceBlock> &blo
return bindingIndex;
}
uint32_t AssignAtomicCounterBufferBindings(const std::vector<gl::AtomicCounterBuffer> &buffers,
const char *qualifier,
uint32_t bindingStart,
gl::ShaderMap<IntermediateShaderSource> *shaderSources)
{
const std::string resourcesDescriptorSet = "set = " + Str(kShaderResourceDescriptorSetIndex);
// Currently, we only support a single atomic counter buffer binding.
ASSERT(buffers.size() <= 1);
uint32_t bindingIndex = bindingStart;
for (const gl::AtomicCounterBuffer &buffer : buffers)
{
const std::string bindingString =
resourcesDescriptorSet + ", binding = " + Str(bindingIndex++);
constexpr char kAtomicCounterBlockName[] = "ANGLEAtomicCounters";
AssignResourceBinding(buffer.activeShaders(), kAtomicCounterBlockName, bindingString,
qualifier, kUnusedBlockSubstitution, shaderSources);
}
return bindingIndex;
}
void AssignBufferBindings(const gl::ProgramState &programState,
gl::ShaderMap<IntermediateShaderSource> *shaderSources)
{
......@@ -745,10 +769,13 @@ void AssignBufferBindings(const gl::ProgramState &programState,
AssignInterfaceBlockBindings(uniformBlocks, kUniformQualifier, bindingStart, shaderSources);
const std::vector<gl::InterfaceBlock> &storageBlocks = programState.getShaderStorageBlocks();
// Note: this pattern of accumulating the bindingStart and assigning the next
// resource will be used to append atomic counter buffers and images to this set.
bindingStart =
AssignInterfaceBlockBindings(storageBlocks, kSSBOQualifier, bindingStart, shaderSources);
const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers =
programState.getAtomicCounterBuffers();
bindingStart = AssignAtomicCounterBufferBindings(atomicCounterBuffers, kSSBOQualifier,
bindingStart, shaderSources);
}
void AssignTextureBindings(const gl::ProgramState &programState,
......
......@@ -182,6 +182,71 @@ void AddInterfaceBlockDescriptorSetDesc(const std::vector<gl::InterfaceBlock> &b
}
}
void AddAtomicCounterBufferDescriptorSetDesc(
const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers,
uint32_t bindingStart,
vk::DescriptorSetLayoutDesc *descOut)
{
uint32_t bindingIndex = 0;
for (uint32_t bufferIndex = 0; bufferIndex < atomicCounterBuffers.size(); ++bufferIndex)
{
VkShaderStageFlags activeStages =
gl_vk::GetShaderStageFlags(atomicCounterBuffers[bufferIndex].activeShaders());
descOut->update(bindingStart + bindingIndex, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
activeStages);
}
}
void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer> &bufferBinding,
VkDeviceSize maxSize,
VkDescriptorSet descSet,
VkDescriptorType descType,
uint32_t bindingIndex,
uint32_t arrayElement,
VkDescriptorBufferInfo *bufferInfoOut,
VkWriteDescriptorSet *writeInfoOut)
{
gl::Buffer *buffer = bufferBinding.get();
ASSERT(buffer != nullptr);
// Make sure there's no possible under/overflow with binding size.
static_assert(sizeof(VkDeviceSize) >= sizeof(bufferBinding.getSize()),
"VkDeviceSize too small");
ASSERT(bufferBinding.getSize() >= 0);
BufferVk *bufferVk = vk::GetImpl(buffer);
GLintptr offset = bufferBinding.getOffset();
VkDeviceSize size = bufferBinding.getSize();
vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
// If size is 0, we can't always use VK_WHOLE_SIZE (or bufferHelper.getSize()), as the
// backing buffer may be larger than max*BufferRange. In that case, we use the minimum of
// the backing buffer size (what's left after offset) and the buffer size as defined by the
// shader. That latter is only valid for UBOs, as SSBOs may have variable length arrays.
size = size > 0 ? size : (bufferHelper.getSize() - offset);
if (maxSize > 0)
{
size = std::min(size, maxSize);
}
bufferInfoOut->buffer = bufferHelper.getBuffer().getHandle();
bufferInfoOut->offset = offset;
bufferInfoOut->range = size;
writeInfoOut->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeInfoOut->pNext = nullptr;
writeInfoOut->dstSet = descSet;
writeInfoOut->dstBinding = bindingIndex;
writeInfoOut->dstArrayElement = arrayElement;
writeInfoOut->descriptorCount = 1;
writeInfoOut->descriptorType = descType;
writeInfoOut->pImageInfo = nullptr;
writeInfoOut->pBufferInfo = bufferInfoOut;
writeInfoOut->pTexelBufferView = nullptr;
ASSERT(writeInfoOut->pBufferInfo[0].buffer != VK_NULL_HANDLE);
}
class Std140BlockLayoutEncoderFactory : public gl::CustomBlockLayoutEncoderFactory
{
public:
......@@ -255,7 +320,10 @@ ProgramVk::DefaultUniformBlock::DefaultUniformBlock() {}
ProgramVk::DefaultUniformBlock::~DefaultUniformBlock() = default;
ProgramVk::ProgramVk(const gl::ProgramState &state)
: ProgramImpl(state), mDynamicBufferOffsets{}, mStorageBlockBindingsOffset(0)
: ProgramImpl(state),
mDynamicBufferOffsets{},
mStorageBlockBindingsOffset(0),
mAtomicCounterBufferBindingsOffset(0)
{}
ProgramVk::~ProgramVk() = default;
......@@ -443,6 +511,8 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf
AddInterfaceBlockDescriptorSetDesc(mState.getShaderStorageBlocks(),
getStorageBlockBindingsOffset(),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffersSetDesc);
AddAtomicCounterBufferDescriptorSetDesc(
mState.getAtomicCounterBuffers(), getAtomicCounterBufferBindingsOffset(), &buffersSetDesc);
ANGLE_TRY(renderer->getDescriptorSetLayout(
contextVk, buffersSetDesc, &mDescriptorSetLayouts[kShaderResourceDescriptorSetIndex]));
......@@ -494,6 +564,8 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf
uint32_t uniformBlockCount = static_cast<uint32_t>(mState.getUniformBlocks().size());
uint32_t storageBlockCount = static_cast<uint32_t>(mState.getShaderStorageBlocks().size());
uint32_t atomicCounterBufferCount =
static_cast<uint32_t>(mState.getAtomicCounterBuffers().size());
uint32_t textureCount = static_cast<uint32_t>(mState.getSamplerBindings().size());
if (renderer->getFeatures().bindEmptyForUnusedDescriptorSets.enabled)
......@@ -509,9 +581,10 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf
{
bufferSetSize.push_back({VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, uniformBlockCount});
}
if (storageBlockCount > 0)
if (storageBlockCount > 0 || atomicCounterBufferCount > 0)
{
bufferSetSize.push_back({VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, storageBlockCount});
bufferSetSize.push_back(
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, storageBlockCount + atomicCounterBufferCount});
}
VkDescriptorPoolSize textureSetSize = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, textureCount};
......@@ -537,6 +610,8 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf
void ProgramVk::updateBindingOffsets()
{
mStorageBlockBindingsOffset = mState.getUniqueUniformBlockCount();
mAtomicCounterBufferBindingsOffset =
mStorageBlockBindingsOffset + mState.getUniqueStorageBlockCount();
}
void ProgramVk::linkResources(const gl::ProgramLinkedResources &resources)
......@@ -1130,18 +1205,17 @@ void ProgramVk::updateBuffersDescriptorSet(ContextVk *contextVk,
continue;
}
gl::Buffer *buffer = bufferBinding.get();
ASSERT(buffer != nullptr);
uint32_t binding = bindingStart + currentBinding;
uint32_t arrayElement = block.isArray ? block.arrayElement : 0;
VkDeviceSize maxBlockSize = isStorageBuffer ? 0 : block.dataSize;
VkDescriptorBufferInfo &bufferInfo = descriptorBufferInfo[writeCount];
VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount];
// Make sure there's no possible under/overflow with binding size.
static_assert(sizeof(VkDeviceSize) >= sizeof(bufferBinding.getSize()),
"VkDeviceSize too small");
ASSERT(bufferBinding.getSize() >= 0);
WriteBufferDescriptorSetBinding(bufferBinding, maxBlockSize, descriptorSet, descriptorType,
binding, arrayElement, &bufferInfo, &writeInfo);
BufferVk *bufferVk = vk::GetImpl(buffer);
GLintptr offset = bufferBinding.getOffset();
VkDeviceSize size = bufferBinding.getSize();
VkDeviceSize blockSize = block.dataSize;
BufferVk *bufferVk = vk::GetImpl(bufferBinding.get());
vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
if (isStorageBuffer)
......@@ -1154,35 +1228,55 @@ void ProgramVk::updateBuffersDescriptorSet(ContextVk *contextVk,
bufferHelper.onRead(recorder, VK_ACCESS_UNIFORM_READ_BIT);
}
// If size is 0, we can't always use VK_WHOLE_SIZE (or bufferHelper.getSize()), as the
// backing buffer may be larger than max*BufferRange. In that case, we use the minimum of
// the backing buffer size (what's left after offset) and the buffer size as defined by the
// shader. That latter is only valid for UBOs, as SSBOs may have variable length arrays.
size = size > 0 ? size : (bufferHelper.getSize() - offset);
if (!isStorageBuffer)
++writeCount;
}
VkDevice device = contextVk->getDevice();
vkUpdateDescriptorSets(device, writeCount, writeDescriptorInfo.data(), 0, nullptr);
}
void ProgramVk::updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk,
vk::CommandGraphResource *recorder)
{
VkDescriptorSet descriptorSet = mDescriptorSets[kShaderResourceDescriptorSetIndex];
const uint32_t bindingStart = getAtomicCounterBufferBindingsOffset();
gl::AtomicCounterBuffersArray<VkDescriptorBufferInfo> descriptorBufferInfo;
gl::AtomicCounterBuffersArray<VkWriteDescriptorSet> writeDescriptorInfo;
uint32_t writeCount = 0;
// Write atomic counter buffers.
const gl::State &glState = contextVk->getState();
const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers =
mState.getAtomicCounterBuffers();
for (uint32_t bufferIndex = 0; bufferIndex < atomicCounterBuffers.size(); ++bufferIndex)
{
const gl::AtomicCounterBuffer &atomicCounterBuffer = atomicCounterBuffers[bufferIndex];
const gl::OffsetBindingPointer<gl::Buffer> &bufferBinding =
glState.getIndexedAtomicCounterBuffer(atomicCounterBuffer.binding);
if (bufferBinding.get() == nullptr)
{
size = std::min(size, blockSize);
continue;
}
uint32_t binding = bindingStart + bufferIndex;
VkDescriptorBufferInfo &bufferInfo = descriptorBufferInfo[writeCount];
VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount];
bufferInfo.buffer = bufferHelper.getBuffer().getHandle();
bufferInfo.offset = offset;
bufferInfo.range = size;
WriteBufferDescriptorSetBinding(bufferBinding, 0, descriptorSet,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, binding, 0, &bufferInfo,
&writeInfo);
VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount];
BufferVk *bufferVk = vk::GetImpl(bufferBinding.get());
vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeInfo.pNext = nullptr;
writeInfo.dstSet = descriptorSet;
writeInfo.dstBinding = bindingStart + currentBinding;
writeInfo.dstArrayElement = block.isArray ? block.arrayElement : 0;
writeInfo.descriptorCount = 1;
writeInfo.descriptorType = descriptorType;
writeInfo.pImageInfo = nullptr;
writeInfo.pBufferInfo = &bufferInfo;
writeInfo.pTexelBufferView = nullptr;
ASSERT(writeInfo.pBufferInfo[0].buffer != VK_NULL_HANDLE);
bufferHelper.onWrite(contextVk, recorder, VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_SHADER_WRITE_BIT);
++writeCount;
}
......@@ -1201,6 +1295,7 @@ angle::Result ProgramVk::updateShaderResourcesDescriptorSet(ContextVk *contextVk
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
updateBuffersDescriptorSet(contextVk, recorder, mState.getShaderStorageBlocks(),
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
updateAtomicCounterBuffersDescriptorSet(contextVk, recorder);
return angle::Result::Continue;
}
......
......@@ -122,6 +122,7 @@ class ProgramVk : public ProgramImpl
bool hasTextures() const { return !mState.getSamplerBindings().empty(); }
bool hasUniformBuffers() const { return !mState.getUniformBlocks().empty(); }
bool hasStorageBuffers() const { return !mState.getShaderStorageBlocks().empty(); }
bool hasAtomicCounterBuffers() const { return !mState.getAtomicCounterBuffers().empty(); }
bool hasTransformFeedbackOutput() const
{
return !mState.getLinkedTransformFeedbackVaryings().empty();
......@@ -185,6 +186,8 @@ class ProgramVk : public ProgramImpl
vk::CommandGraphResource *recorder,
const std::vector<gl::InterfaceBlock> &blocks,
VkDescriptorType descriptorType);
void updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk,
vk::CommandGraphResource *recorder);
template <class T>
void getUniformImpl(GLint location, T *v, GLenum entryPointType) const;
......@@ -197,6 +200,10 @@ class ProgramVk : public ProgramImpl
void updateBindingOffsets();
uint32_t getUniformBlockBindingsOffset() const { return 0; }
uint32_t getStorageBlockBindingsOffset() const { return mStorageBlockBindingsOffset; }
uint32_t getAtomicCounterBufferBindingsOffset() const
{
return mAtomicCounterBufferBindingsOffset;
}
class ShaderInfo;
ANGLE_INLINE angle::Result initShaders(ContextVk *contextVk,
......@@ -309,9 +316,11 @@ class ProgramVk : public ProgramImpl
// We keep the translated linked shader sources to use with shader draw call patching.
gl::ShaderMap<std::string> mShaderSources;
// Storage buffers are placed after uniform buffers in their descriptor set. This cached value
// contains the offset where storage buffer bindings start.
// In their descriptor set, uniform buffers are placed first, then storage buffers, then atomic
// counter buffers. These cached values contain the offsets where storage buffer and atomic
// counter buffer bindings start.
uint32_t mStorageBlockBindingsOffset;
uint32_t mAtomicCounterBufferBindingsOffset;
// Store descriptor pools here. We store the descriptors in the Program to facilitate descriptor
// cache management. It can also allow fewer descriptors for shaders which use fewer
......
......@@ -1112,6 +1112,19 @@ gl::Version RendererVk::getMaxSupportedESVersion() const
// Current highest supported version
gl::Version maxVersion = gl::Version(3, 1);
// Limit to ES3.0 if there are any blockers for 3.1.
// ES3.1 requires at least one atomic counter buffer and four storage buffers in compute.
// Atomic counter buffers are emulated with storage buffers, so if Vulkan doesn't support at
// least 5 storage buffers in compute, we cannot support 3.1.
if (mPhysicalDeviceProperties.limits.maxPerStageDescriptorStorageBuffers <
gl::limits::kMinimumComputeStorageBuffers + 1)
{
maxVersion = std::min(maxVersion, gl::Version(3, 0));
}
// Limit to ES2.0 if there are any blockers for 3.0.
// If the command buffer doesn't support queries, we can't support ES3.
if (!vk::CommandBuffer::SupportsQueries(mPhysicalDeviceFeatures))
{
......
......@@ -256,6 +256,31 @@ void RendererVk::ensureCapsInitialized() const
maxPerStageStorageBuffers = std::min(maxPerStageStorageBuffers, maxCombinedStorageBuffers);
}
// Reserve one storage buffer in the fragment and compute stages for atomic counters. This is
// only possible if the number of per-stage storage buffers is greater than 4, which is the
// required GLES minimum for compute. We use the same value for fragment, to avoid giving one
// of the precious few storage buffers available to an atomic counter buffer. The spec allows
// there to be zero of either of these resources in the fragment stage.
uint32_t maxVertexStageAtomicCounterBuffers = 0;
uint32_t maxPerStageAtomicCounterBuffers = 0;
uint32_t maxCombinedAtomicCounterBuffers = 0;
if (maxPerStageStorageBuffers > gl::limits::kMinimumComputeStorageBuffers)
{
--maxPerStageStorageBuffers;
--maxCombinedStorageBuffers;
maxPerStageAtomicCounterBuffers = 1;
maxCombinedAtomicCounterBuffers = 1;
}
// For the vertex stage, similarly reserve one storage buffer for atomic counters, if there are
// excess storage buffers.
if (maxVertexStageStorageBuffers > gl::limits::kMinimumComputeStorageBuffers)
{
--maxVertexStageStorageBuffers;
maxVertexStageAtomicCounterBuffers = 1;
}
mNativeCaps.maxShaderStorageBlocks[gl::ShaderType::Vertex] =
mPhysicalDeviceFeatures.vertexPipelineStoresAndAtomics ? maxVertexStageStorageBuffers : 0;
mNativeCaps.maxShaderStorageBlocks[gl::ShaderType::Fragment] =
......@@ -268,6 +293,31 @@ void RendererVk::ensureCapsInitialized() const
mNativeCaps.shaderStorageBufferOffsetAlignment =
static_cast<GLuint>(limitsVk.minStorageBufferOffsetAlignment);
mNativeCaps.maxShaderAtomicCounterBuffers[gl::ShaderType::Vertex] =
mPhysicalDeviceFeatures.vertexPipelineStoresAndAtomics ? maxVertexStageAtomicCounterBuffers
: 0;
mNativeCaps.maxShaderAtomicCounterBuffers[gl::ShaderType::Fragment] =
mPhysicalDeviceFeatures.fragmentStoresAndAtomics ? maxPerStageAtomicCounterBuffers : 0;
mNativeCaps.maxShaderAtomicCounterBuffers[gl::ShaderType::Compute] =
maxPerStageAtomicCounterBuffers;
mNativeCaps.maxCombinedAtomicCounterBuffers = maxCombinedAtomicCounterBuffers;
mNativeCaps.maxAtomicCounterBufferBindings = maxCombinedAtomicCounterBuffers;
// Emulated as storage buffers, atomic counter buffers have the same size limit. However, the
// limit is a signed integer and values above int max will end up as a negative size.
mNativeCaps.maxAtomicCounterBufferSize =
std::min<uint32_t>(std::numeric_limits<int32_t>::max(), limitsVk.maxStorageBufferRange);
// There is no particular limit to how many atomic counters there can be, other than the size of
// a storage buffer. We nevertheless limit this to something sane (4096 arbitrarily).
const uint32_t maxAtomicCounters =
std::min<size_t>(4096, limitsVk.maxStorageBufferRange / sizeof(uint32_t));
for (gl::ShaderType shaderType : gl::AllShaderTypes())
{
mNativeCaps.maxShaderAtomicCounters[shaderType] = maxAtomicCounters;
}
mNativeCaps.maxCombinedAtomicCounters = maxAtomicCounters;
mNativeCaps.minProgramTexelOffset = mPhysicalDeviceProperties.limits.minTexelOffset;
mNativeCaps.maxProgramTexelOffset = mPhysicalDeviceProperties.limits.maxTexelOffset;
......
......@@ -672,9 +672,10 @@
// Tessellation geometry interaction:
3572 VULKAN : dEQP-GLES31.functional.tessellation_geometry_interaction.* = FAIL
// Atomic counters:
3566 VULKAN : dEQP-GLES31.functional.*atomic_counter* = FAIL
3566 VULKAN : dEQP-GLES31.functional.ssbo.layout.* = FAIL
// dEQP bug where atomic counter bindings > max result in failure instead of not-supported:
// https://github.com/KhronosGroup/VK-GL-CTS/issues/156
3566 VULKAN : dEQP-GLES31.functional.atomic_counter.* = FAIL
3566 VULKAN : dEQP-GLES31.functional.synchronization.inter_call.without_memory_barrier.*atomic_counter* = FAIL
// Storage image:
3563 VULKAN : dEQP-GLES31.functional.state_query.*image* = FAIL
......@@ -731,6 +732,9 @@
3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.39 = FAIL
3443 VULKAN : dEQP-GLES31.functional.ubo.random.all_shared_buffer.45 = FAIL
// Inactive SSBOs with flexible array member (about 20% of these tests are affected):
3714 VULKAN : dEQP-GLES31.functional.ssbo.layout.random.* = FAIL
// Validation errors:
// Optimal tiling not supported for format:
3520 VULKAN : dEQP-GLES31.functional.state_query.texture_level.texture* = SKIP
......
......@@ -55,9 +55,9 @@
// Base texture level:
3184 VULKAN : KHR-GLES31.core.texture*base-level* = SKIP
// Atomic counters:
3566 VULKAN : KHR-GLES31.core.shader_atomic_counters.* = FAIL
3566 VULKAN : KHR-GLES31.core.layout_binding.buffer_layout_binding_atomic* = SKIP
// Program Pipeline Objects:
3570 VULKAN : KHR-GLES31.core.shader_atomic_counters.advanced-usage-many-draw-calls2 = FAIL
3570 VULKAN : KHR-GLES31.core.shader_atomic_counters.advanced-usage-many-dispatches = FAIL
// RGBA32F:
3520 VULKAN : KHR-GLES31.core.texture_gather.*-gather-float-2d-rgb = SKIP
......@@ -107,6 +107,8 @@
// Storage image:
3563 VULKAN : KHR-GLES31.core.layout_binding.sampler2D_layout_binding_texture_ComputeShader = FAIL
3563 VULKAN : KHR-GLES31.core.layout_binding.block_layout_binding_block_ComputeShader = FAIL
3563 VULKAN : KHR-GLES31.core.layout_binding.buffer_layout_binding_atomic* = SKIP
3563 VULKAN : KHR-GLES31.core.layout_binding.atomic_uint_layout_binding_atomic* = FAIL
3520 VULKAN : KHR-GLES31.core.internalformat.copy_tex_image* = FAIL
3520 VULKAN : KHR-GLES31.core.internalformat.renderbuffer* = FAIL
......
......@@ -123,6 +123,8 @@ TEST_P(AtomicCounterBufferTest31, AtomicCounterReadCompute)
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(binding = 0, offset = 8) uniform atomic_uint ac[3];
void atomicCounterInFunction(in atomic_uint counter[3]);
void atomicCounterInFunction(in atomic_uint counter[3])
{
atomicCounter(counter[0]);
......@@ -272,7 +274,12 @@ ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest,
ES3_OPENGLES(),
ES31_OPENGL(),
ES31_OPENGLES(),
ES31_D3D11());
ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest31, ES31_OPENGL(), ES31_OPENGLES(), ES31_D3D11());
ES31_D3D11(),
ES31_VULKAN());
ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest31,
ES31_OPENGL(),
ES31_OPENGLES(),
ES31_D3D11(),
ES31_VULKAN());
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment