Commit 44e690ca by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Support unaligned atomic counter buffer binding

GLES doesn't require any implementation-specified alignment requirement for atomic counter buffers. They are emulated with Vulkan storage buffers, which do have restrictions. The storage buffers are bound at aligned offsets, and the remaining offsets are passed to the shader as uniform values. This means that the driver uniforms are now also bound to the compute pipeline. Bug: angleproject:3566 Change-Id: I1a3429438f76d95e33cb5c6ef2c9370a10d900d6 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1713095 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org>
parent 6201d134
...@@ -172,12 +172,17 @@ constexpr const char kViewportYScale[] = "viewportYScale"; ...@@ -172,12 +172,17 @@ constexpr const char kViewportYScale[] = "viewportYScale";
constexpr const char kNegViewportYScale[] = "negViewportYScale"; constexpr const char kNegViewportYScale[] = "negViewportYScale";
constexpr const char kXfbActiveUnpaused[] = "xfbActiveUnpaused"; constexpr const char kXfbActiveUnpaused[] = "xfbActiveUnpaused";
constexpr const char kXfbBufferOffsets[] = "xfbBufferOffsets"; constexpr const char kXfbBufferOffsets[] = "xfbBufferOffsets";
constexpr const char kAcbBufferOffsets[] = "acbBufferOffsets";
constexpr const char kDepthRange[] = "depthRange"; constexpr const char kDepthRange[] = "depthRange";
constexpr size_t kNumDriverUniforms = 7; constexpr size_t kNumGraphicsDriverUniforms = 8;
constexpr std::array<const char *, kNumDriverUniforms> kDriverUniformNames = { constexpr std::array<const char *, kNumGraphicsDriverUniforms> kGraphicsDriverUniformNames = {
{kViewport, kHalfRenderAreaHeight, kViewportYScale, kNegViewportYScale, kXfbActiveUnpaused, {kViewport, kHalfRenderAreaHeight, kViewportYScale, kNegViewportYScale, kXfbActiveUnpaused,
kXfbBufferOffsets, kDepthRange}}; kXfbBufferOffsets, kAcbBufferOffsets, kDepthRange}};
constexpr size_t kNumComputeDriverUniforms = 1;
constexpr std::array<const char *, kNumComputeDriverUniforms> kComputeDriverUniformNames = {
{kAcbBufferOffsets}};
size_t FindFieldIndex(const TFieldList &fieldList, const char *fieldName) size_t FindFieldIndex(const TFieldList &fieldList, const char *fieldName)
{ {
...@@ -324,10 +329,12 @@ void AppendVertexShaderTransformFeedbackOutputToMain(TIntermBlock *root, TSymbol ...@@ -324,10 +329,12 @@ void AppendVertexShaderTransformFeedbackOutputToMain(TIntermBlock *root, TSymbol
RunAtTheEndOfShader(root, new TIntermSymbol(xfbPlaceholder), symbolTable); RunAtTheEndOfShader(root, new TIntermSymbol(xfbPlaceholder), symbolTable);
} }
// The AddDriverUniformsToShader operation adds an internal uniform block to a shader. The driver // The Add*DriverUniformsToShader operation adds an internal uniform block to a shader. The driver
// block is used to implement Vulkan-specific features and workarounds. Returns the driver uniforms // block is used to implement Vulkan-specific features and workarounds. Returns the driver uniforms
// variable. // variable.
const TVariable *AddDriverUniformsToShader(TIntermBlock *root, TSymbolTable *symbolTable) //
// There are Graphics and Compute variations as they require different uniforms.
const TVariable *AddGraphicsDriverUniformsToShader(TIntermBlock *root, TSymbolTable *symbolTable)
{ {
// Init the depth range type. // Init the depth range type.
TFieldList *depthRangeParamsFields = new TFieldList(); TFieldList *depthRangeParamsFields = new TFieldList();
...@@ -354,24 +361,50 @@ const TVariable *AddDriverUniformsToShader(TIntermBlock *root, TSymbolTable *sym ...@@ -354,24 +361,50 @@ const TVariable *AddDriverUniformsToShader(TIntermBlock *root, TSymbolTable *sym
DeclareGlobalVariable(root, depthRangeVar); DeclareGlobalVariable(root, depthRangeVar);
// This field list mirrors the structure of ContextVk::DriverUniforms. // This field list mirrors the structure of GraphicsDriverUniforms in ContextVk.cpp.
TFieldList *driverFieldList = new TFieldList; TFieldList *driverFieldList = new TFieldList;
const std::array<TType *, kNumDriverUniforms> kDriverUniformTypes = {{ const std::array<TType *, kNumGraphicsDriverUniforms> kDriverUniformTypes = {{
new TType(EbtFloat, 4), new TType(EbtFloat, 4),
new TType(EbtFloat), new TType(EbtFloat),
new TType(EbtFloat), new TType(EbtFloat),
new TType(EbtFloat), new TType(EbtFloat),
new TType(EbtUInt), new TType(EbtUInt),
new TType(EbtInt, 4), new TType(EbtInt, 4),
new TType(EbtUInt, 4),
emulatedDepthRangeType, emulatedDepthRangeType,
}}; }};
for (size_t uniformIndex = 0; uniformIndex < kNumDriverUniforms; ++uniformIndex) for (size_t uniformIndex = 0; uniformIndex < kNumGraphicsDriverUniforms; ++uniformIndex)
{
TField *driverUniformField =
new TField(kDriverUniformTypes[uniformIndex],
ImmutableString(kGraphicsDriverUniformNames[uniformIndex]), TSourceLoc(),
SymbolType::AngleInternal);
driverFieldList->push_back(driverUniformField);
}
// Define a driver uniform block "ANGLEUniformBlock" with instance name "ANGLEUniforms".
return DeclareInterfaceBlock(root, symbolTable, driverFieldList, EvqUniform,
TMemoryQualifier::Create(), 0, kUniformsBlockName,
kUniformsVarName);
}
const TVariable *AddComputeDriverUniformsToShader(TIntermBlock *root, TSymbolTable *symbolTable)
{
// This field list mirrors the structure of ComputeDriverUniforms in ContextVk.cpp.
TFieldList *driverFieldList = new TFieldList;
const std::array<TType *, kNumComputeDriverUniforms> kDriverUniformTypes = {{
new TType(EbtUInt, 4),
}};
for (size_t uniformIndex = 0; uniformIndex < kNumComputeDriverUniforms; ++uniformIndex)
{ {
TField *driverUniformField = new TField(kDriverUniformTypes[uniformIndex], TField *driverUniformField =
ImmutableString(kDriverUniformNames[uniformIndex]), new TField(kDriverUniformTypes[uniformIndex],
TSourceLoc(), SymbolType::AngleInternal); ImmutableString(kComputeDriverUniformNames[uniformIndex]), TSourceLoc(),
SymbolType::AngleInternal);
driverFieldList->push_back(driverUniformField); driverFieldList->push_back(driverUniformField);
} }
...@@ -653,7 +686,7 @@ void TranslatorVulkan::translate(TIntermBlock *root, ...@@ -653,7 +686,7 @@ void TranslatorVulkan::translate(TIntermBlock *root,
} }
} }
// TODO(lucferron): Refactor this function to do less tree traversals. // TODO(lucferron): Refactor this function to do fewer tree traversals.
// http://anglebug.com/2461 // http://anglebug.com/2461
if (structTypesUsedForUniforms > 0) if (structTypesUsedForUniforms > 0)
{ {
...@@ -678,15 +711,27 @@ void TranslatorVulkan::translate(TIntermBlock *root, ...@@ -678,15 +711,27 @@ void TranslatorVulkan::translate(TIntermBlock *root,
sink << "};\n"; sink << "};\n";
} }
const TVariable *driverUniforms;
if (getShaderType() == GL_COMPUTE_SHADER)
{
driverUniforms = AddComputeDriverUniformsToShader(root, &getSymbolTable());
}
else
{
driverUniforms = AddGraphicsDriverUniformsToShader(root, &getSymbolTable());
}
if (atomicCounterCount > 0) if (atomicCounterCount > 0)
{ {
RewriteAtomicCounters(root, &getSymbolTable()); // ANGLEUniforms.acbBufferOffsets
const TIntermBinary *acbBufferOffsets =
CreateDriverUniformRef(driverUniforms, kAcbBufferOffsets);
RewriteAtomicCounters(root, &getSymbolTable(), acbBufferOffsets);
} }
const TVariable *driverUniforms = nullptr;
if (getShaderType() != GL_COMPUTE_SHADER) if (getShaderType() != GL_COMPUTE_SHADER)
{ {
driverUniforms = AddDriverUniformsToShader(root, &getSymbolTable());
ReplaceGLDepthRangeWithDriverUniform(root, driverUniforms, &getSymbolTable()); ReplaceGLDepthRangeWithDriverUniform(root, driverUniforms, &getSymbolTable());
} }
......
...@@ -72,7 +72,9 @@ TIntermTyped *CreateAtomicCounterConstant(TType *atomicCounterType, ...@@ -72,7 +72,9 @@ TIntermTyped *CreateAtomicCounterConstant(TType *atomicCounterType,
return TIntermAggregate::CreateConstructor(*atomicCounterType, arguments); return TIntermAggregate::CreateConstructor(*atomicCounterType, arguments);
} }
TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTyped *bindingOffset) TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters,
const TIntermTyped *bindingOffset,
const TIntermTyped *bufferOffsets)
{ {
// The atomic counters storage buffer declaration looks as such: // The atomic counters storage buffer declaration looks as such:
// //
...@@ -87,6 +89,8 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy ...@@ -87,6 +89,8 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy
// return: // return:
// //
// atomicCounters[binding].counters[offset] // atomicCounters[binding].counters[offset]
//
// The offset itself is the provided one plus an offset given through uniforms.
TIntermSymbol *atomicCountersRef = new TIntermSymbol(atomicCounters); TIntermSymbol *atomicCountersRef = new TIntermSymbol(atomicCounters);
...@@ -96,8 +100,9 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy ...@@ -96,8 +100,9 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy
// Create references to bindingOffset.binding and bindingOffset.offset. // Create references to bindingOffset.binding and bindingOffset.offset.
TIntermBinary *binding = TIntermBinary *binding =
new TIntermBinary(EOpIndexDirectStruct, bindingOffset, bindingFieldRef); new TIntermBinary(EOpIndexDirectStruct, bindingOffset->deepCopy(), bindingFieldRef);
TIntermBinary *offset = new TIntermBinary(EOpIndexDirectStruct, bindingOffset, offsetFieldRef); TIntermBinary *offset =
new TIntermBinary(EOpIndexDirectStruct, bindingOffset->deepCopy(), offsetFieldRef);
// Create reference to atomicCounters[bindingOffset.binding] // Create reference to atomicCounters[bindingOffset.binding]
TIntermBinary *countersBlock = new TIntermBinary(EOpIndexDirect, atomicCountersRef, binding); TIntermBinary *countersBlock = new TIntermBinary(EOpIndexDirect, atomicCountersRef, binding);
...@@ -106,7 +111,27 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy ...@@ -106,7 +111,27 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy
TIntermBinary *counters = TIntermBinary *counters =
new TIntermBinary(EOpIndexDirectInterfaceBlock, countersBlock, countersFieldRef); new TIntermBinary(EOpIndexDirectInterfaceBlock, countersBlock, countersFieldRef);
// return atomicCounters[bindingOffset.binding].counters[bindingOffset.offset] // Create bufferOffsets[binding / 4]. Each uint in bufferOffsets contains offsets for 4
// bindings.
TIntermBinary *bindingDivFour =
new TIntermBinary(EOpDiv, binding->deepCopy(), CreateUIntConstant(4));
TIntermBinary *bufferOffsetUint =
new TIntermBinary(EOpIndexDirect, bufferOffsets->deepCopy(), bindingDivFour);
// Create (binding % 4) * 8
TIntermBinary *bindingModFour =
new TIntermBinary(EOpIMod, binding->deepCopy(), CreateUIntConstant(4));
TIntermBinary *bufferOffsetShift =
new TIntermBinary(EOpMul, bindingModFour, CreateUIntConstant(8));
// Create bufferOffsets[binding / 4] >> ((binding % 4) * 8) & 0xFF
TIntermBinary *bufferOffsetShifted =
new TIntermBinary(EOpBitShiftRight, bufferOffsetUint, bufferOffsetShift);
TIntermBinary *bufferOffset =
new TIntermBinary(EOpBitwiseAnd, bufferOffsetShifted, CreateUIntConstant(0xFF));
// return atomicCounters[bindingOffset.binding].counters[bindingOffset.offset + bufferOffset]
offset = new TIntermBinary(EOpAdd, offset, bufferOffset);
return new TIntermBinary(EOpIndexDirect, counters, offset); return new TIntermBinary(EOpIndexDirect, counters, offset);
} }
...@@ -119,9 +144,12 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy ...@@ -119,9 +144,12 @@ TIntermBinary *CreateAtomicCounterRef(const TVariable *atomicCounters, TIntermTy
class RewriteAtomicCountersTraverser : public TIntermTraverser class RewriteAtomicCountersTraverser : public TIntermTraverser
{ {
public: public:
RewriteAtomicCountersTraverser(TSymbolTable *symbolTable, const TVariable *atomicCounters) RewriteAtomicCountersTraverser(TSymbolTable *symbolTable,
const TVariable *atomicCounters,
const TIntermTyped *acbBufferOffsets)
: TIntermTraverser(true, true, true, symbolTable), : TIntermTraverser(true, true, true, symbolTable),
mAtomicCounters(atomicCounters), mAtomicCounters(atomicCounters),
mAcbBufferOffsets(acbBufferOffsets),
mCurrentAtomicCounterOffset(0), mCurrentAtomicCounterOffset(0),
mCurrentAtomicCounterBinding(0), mCurrentAtomicCounterBinding(0),
mCurrentAtomicCounterDecl(nullptr), mCurrentAtomicCounterDecl(nullptr),
...@@ -506,7 +534,8 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser ...@@ -506,7 +534,8 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser
TIntermTyped *bindingOffset = mAtomicCounterFunctionCallArgs[param]; TIntermTyped *bindingOffset = mAtomicCounterFunctionCallArgs[param];
TIntermSequence *substituteArguments = new TIntermSequence; TIntermSequence *substituteArguments = new TIntermSequence;
substituteArguments->push_back(CreateAtomicCounterRef(mAtomicCounters, bindingOffset)); substituteArguments->push_back(
CreateAtomicCounterRef(mAtomicCounters, bindingOffset, mAcbBufferOffsets));
substituteArguments->push_back(CreateUIntConstant(valueChange)); substituteArguments->push_back(CreateUIntConstant(valueChange));
TIntermTyped *substituteCall = CreateBuiltInFunctionCallNode( TIntermTyped *substituteCall = CreateBuiltInFunctionCallNode(
...@@ -559,6 +588,7 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser ...@@ -559,6 +588,7 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser
} }
const TVariable *mAtomicCounters; const TVariable *mAtomicCounters;
const TIntermTyped *mAcbBufferOffsets;
// A map from the atomic_uint variable to the binding/offset declaration. // A map from the atomic_uint variable to the binding/offset declaration.
std::unordered_map<const TVariable *, TVariable *> mAtomicCounterBindingOffsets; std::unordered_map<const TVariable *, TVariable *> mAtomicCounterBindingOffsets;
...@@ -585,11 +615,13 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser ...@@ -585,11 +615,13 @@ class RewriteAtomicCountersTraverser : public TIntermTraverser
} // anonymous namespace } // anonymous namespace
void RewriteAtomicCounters(TIntermBlock *root, TSymbolTable *symbolTable) void RewriteAtomicCounters(TIntermBlock *root,
TSymbolTable *symbolTable,
const TIntermTyped *acbBufferOffsets)
{ {
const TVariable *atomicCounters = DeclareAtomicCountersBuffers(root, symbolTable); const TVariable *atomicCounters = DeclareAtomicCountersBuffers(root, symbolTable);
RewriteAtomicCountersTraverser traverser(symbolTable, atomicCounters); RewriteAtomicCountersTraverser traverser(symbolTable, atomicCounters, acbBufferOffsets);
root->traverse(&traverser); root->traverse(&traverser);
traverser.updateTree(); traverser.updateTree();
......
...@@ -12,10 +12,13 @@ ...@@ -12,10 +12,13 @@
namespace sh namespace sh
{ {
class TIntermBlock; class TIntermBlock;
class TIntermTyped;
class TSymbolTable; class TSymbolTable;
class TVariable; class TVariable;
void RewriteAtomicCounters(TIntermBlock *root, TSymbolTable *symbolTable); void RewriteAtomicCounters(TIntermBlock *root,
TSymbolTable *symbolTable,
const TIntermTyped *acbBufferOffsets);
} // namespace sh } // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_REWRITEATOMICCOUNTERS_H_ #endif // COMPILER_TRANSLATOR_TREEOPS_REWRITEATOMICCOUNTERS_H_
...@@ -1589,19 +1589,19 @@ angle::Result State::setIndexedBufferBinding(const Context *context, ...@@ -1589,19 +1589,19 @@ angle::Result State::setIndexedBufferBinding(const Context *context,
const OffsetBindingPointer<Buffer> &State::getIndexedUniformBuffer(size_t index) const const OffsetBindingPointer<Buffer> &State::getIndexedUniformBuffer(size_t index) const
{ {
ASSERT(static_cast<size_t>(index) < mUniformBuffers.size()); ASSERT(index < mUniformBuffers.size());
return mUniformBuffers[index]; return mUniformBuffers[index];
} }
const OffsetBindingPointer<Buffer> &State::getIndexedAtomicCounterBuffer(size_t index) const const OffsetBindingPointer<Buffer> &State::getIndexedAtomicCounterBuffer(size_t index) const
{ {
ASSERT(static_cast<size_t>(index) < mAtomicCounterBuffers.size()); ASSERT(index < mAtomicCounterBuffers.size());
return mAtomicCounterBuffers[index]; return mAtomicCounterBuffers[index];
} }
const OffsetBindingPointer<Buffer> &State::getIndexedShaderStorageBuffer(size_t index) const const OffsetBindingPointer<Buffer> &State::getIndexedShaderStorageBuffer(size_t index) const
{ {
ASSERT(static_cast<size_t>(index) < mShaderStorageBuffers.size()); ASSERT(index < mShaderStorageBuffers.size());
return mShaderStorageBuffers[index]; return mShaderStorageBuffers[index];
} }
......
...@@ -349,6 +349,8 @@ class State : angle::NonCopyable ...@@ -349,6 +349,8 @@ class State : angle::NonCopyable
GLintptr offset, GLintptr offset,
GLsizeiptr size); GLsizeiptr size);
size_t getAtomicCounterBufferCount() const { return mAtomicCounterBuffers.size(); }
const OffsetBindingPointer<Buffer> &getIndexedUniformBuffer(size_t index) const; const OffsetBindingPointer<Buffer> &getIndexedUniformBuffer(size_t index) const;
const OffsetBindingPointer<Buffer> &getIndexedAtomicCounterBuffer(size_t index) const; const OffsetBindingPointer<Buffer> &getIndexedAtomicCounterBuffer(size_t index) const;
const OffsetBindingPointer<Buffer> &getIndexedShaderStorageBuffer(size_t index) const; const OffsetBindingPointer<Buffer> &getIndexedShaderStorageBuffer(size_t index) const;
......
...@@ -45,6 +45,35 @@ namespace rx ...@@ -45,6 +45,35 @@ namespace rx
namespace namespace
{ {
// For shader uniforms such as gl_DepthRange and the viewport size.
struct GraphicsDriverUniforms
{
std::array<float, 4> viewport;
float halfRenderAreaHeight;
float viewportYScale;
float negViewportYScale;
uint32_t xfbActiveUnpaused;
std::array<int32_t, 4> xfbBufferOffsets;
// .xy contain packed 8-bit values for atomic counter buffer offsets. These offsets are
// within Vulkan's minStorageBufferOffsetAlignment limit and are used to support unaligned
// offsets allowed in GL.
//
// .zw are unused.
std::array<uint32_t, 4> acbBufferOffsets;
// We'll use x, y, z for near / far / diff respectively.
std::array<float, 4> depthRange;
};
struct ComputeDriverUniforms
{
// Atomic counter buffer offsets with the same layout as in GraphicsDriverUniforms.
std::array<uint32_t, 4> acbBufferOffsets;
};
GLenum DefaultGLErrorCode(VkResult result) GLenum DefaultGLErrorCode(VkResult result)
{ {
switch (result) switch (result)
...@@ -66,7 +95,7 @@ constexpr VkBufferUsageFlags kVertexBufferUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_ ...@@ -66,7 +95,7 @@ constexpr VkBufferUsageFlags kVertexBufferUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_
constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values); constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values);
constexpr size_t kDefaultBufferSize = kDefaultValueSize * 16; constexpr size_t kDefaultBufferSize = kDefaultValueSize * 16;
constexpr size_t kDefaultPoolAllocatorPageSize = 16 * 1024; constexpr size_t kDefaultPoolAllocatorPageSize = 16 * 1024;
constexpr size_t kDriverUniformsBufferSize = 64; constexpr size_t kDriverUniformsAllocatorPageSize = 4 * 1024;
// Wait a maximum of 10s. If that times out, we declare it a failure. // Wait a maximum of 10s. If that times out, we declare it a failure.
constexpr uint64_t kMaxFenceWaitTimeNs = 10'000'000'000llu; constexpr uint64_t kMaxFenceWaitTimeNs = 10'000'000'000llu;
...@@ -143,6 +172,27 @@ void ApplySampleCoverage(const gl::State &glState, ...@@ -143,6 +172,27 @@ void ApplySampleCoverage(const gl::State &glState,
} // anonymous namespace } // anonymous namespace
ContextVk::DriverUniformsDescriptorSet::DriverUniformsDescriptorSet()
: descriptorSet(VK_NULL_HANDLE), dynamicOffset(0)
{}
ContextVk::DriverUniformsDescriptorSet::~DriverUniformsDescriptorSet() = default;
void ContextVk::DriverUniformsDescriptorSet::init(RendererVk *rendererVk)
{
size_t minAlignment = static_cast<size_t>(
rendererVk->getPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment);
dynamicBuffer.init(rendererVk, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, minAlignment,
kDriverUniformsAllocatorPageSize, true);
}
void ContextVk::DriverUniformsDescriptorSet::destroy(VkDevice device)
{
descriptorSetLayout.reset();
descriptorPoolBinding.reset();
dynamicBuffer.destroy(device);
}
// CommandBatch implementation. // CommandBatch implementation.
ContextVk::CommandBatch::CommandBatch() = default; ContextVk::CommandBatch::CommandBatch() = default;
...@@ -183,8 +233,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk ...@@ -183,8 +233,6 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mClearColorMask(kAllColorChannelsMask), mClearColorMask(kAllColorChannelsMask),
mFlipYForCurrentSurface(false), mFlipYForCurrentSurface(false),
mIsAnyHostVisibleBufferWritten(false), mIsAnyHostVisibleBufferWritten(false),
mDriverUniformsDescriptorSet(VK_NULL_HANDLE),
mDriverUniformsDynamicOffset(0),
mLastCompletedQueueSerial(renderer->nextSerial()), mLastCompletedQueueSerial(renderer->nextSerial()),
mCurrentQueueSerial(renderer->nextSerial()), mCurrentQueueSerial(renderer->nextSerial()),
mPoolAllocator(kDefaultPoolAllocatorPageSize, 1), mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
...@@ -233,6 +281,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk ...@@ -233,6 +281,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mComputeDirtyBitHandlers[DIRTY_BIT_PIPELINE] = &ContextVk::handleDirtyComputePipeline; mComputeDirtyBitHandlers[DIRTY_BIT_PIPELINE] = &ContextVk::handleDirtyComputePipeline;
mComputeDirtyBitHandlers[DIRTY_BIT_TEXTURES] = &ContextVk::handleDirtyComputeTextures; mComputeDirtyBitHandlers[DIRTY_BIT_TEXTURES] = &ContextVk::handleDirtyComputeTextures;
mComputeDirtyBitHandlers[DIRTY_BIT_DRIVER_UNIFORMS] =
&ContextVk::handleDirtyComputeDriverUniforms;
mComputeDirtyBitHandlers[DIRTY_BIT_SHADER_RESOURCES] = mComputeDirtyBitHandlers[DIRTY_BIT_SHADER_RESOURCES] =
&ContextVk::handleDirtyComputeShaderResources; &ContextVk::handleDirtyComputeShaderResources;
mComputeDirtyBitHandlers[DIRTY_BIT_DESCRIPTOR_SETS] = mComputeDirtyBitHandlers[DIRTY_BIT_DESCRIPTOR_SETS] =
...@@ -256,10 +306,13 @@ void ContextVk::onDestroy(const gl::Context *context) ...@@ -256,10 +306,13 @@ void ContextVk::onDestroy(const gl::Context *context)
VkDevice device = getDevice(); VkDevice device = getDevice();
mDriverUniformsSetLayout.reset();
mIncompleteTextures.onDestroy(context); mIncompleteTextures.onDestroy(context);
mDriverUniformsBuffer.destroy(device);
mDriverUniformsDescriptorPoolBinding.reset(); for (DriverUniformsDescriptorSet &driverUniforms : mDriverUniforms)
{
driverUniforms.destroy(device);
}
mDriverUniformsDescriptorPool.destroy(device); mDriverUniformsDescriptorPool.destroy(device);
for (vk::DynamicBuffer &defaultBuffer : mDefaultAttribBuffers) for (vk::DynamicBuffer &defaultBuffer : mDefaultAttribBuffers)
...@@ -316,14 +369,20 @@ angle::Result ContextVk::initialize() ...@@ -316,14 +369,20 @@ angle::Result ContextVk::initialize()
ANGLE_TRY(mQueryPools[gl::QueryType::TimeElapsed].init(this, VK_QUERY_TYPE_TIMESTAMP, ANGLE_TRY(mQueryPools[gl::QueryType::TimeElapsed].init(this, VK_QUERY_TYPE_TIMESTAMP,
vk::kDefaultTimestampQueryPoolSize)); vk::kDefaultTimestampQueryPoolSize));
size_t minAlignment = static_cast<size_t>( // Init driver uniforms and get the descriptor set layouts.
mRenderer->getPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment); constexpr angle::PackedEnumMap<PipelineType, VkShaderStageFlags> kPipelineStages = {
mDriverUniformsBuffer.init(mRenderer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, minAlignment, {PipelineType::Graphics, VK_SHADER_STAGE_ALL_GRAPHICS},
sizeof(DriverUniforms) * kDriverUniformsBufferSize, true); {PipelineType::Compute, VK_SHADER_STAGE_COMPUTE_BIT},
};
for (PipelineType pipeline : angle::AllEnums<PipelineType>())
{
mDriverUniforms[pipeline].init(mRenderer);
// Get the descriptor set layout. vk::DescriptorSetLayoutDesc desc =
vk::DescriptorSetLayoutDesc desc = getDriverUniformsDescriptorSetDesc(); getDriverUniformsDescriptorSetDesc(kPipelineStages[pipeline]);
ANGLE_TRY(mRenderer->getDescriptorSetLayout(this, desc, &mDriverUniformsSetLayout)); ANGLE_TRY(mRenderer->getDescriptorSetLayout(
this, desc, &mDriverUniforms[pipeline].descriptorSetLayout));
}
mGraphicsPipelineDesc.reset(new vk::GraphicsPipelineDesc()); mGraphicsPipelineDesc.reset(new vk::GraphicsPipelineDesc());
mGraphicsPipelineDesc->initDefaults(); mGraphicsPipelineDesc->initDefaults();
...@@ -435,7 +494,7 @@ angle::Result ContextVk::setupDraw(const gl::Context *context, ...@@ -435,7 +494,7 @@ angle::Result ContextVk::setupDraw(const gl::Context *context,
if (mState.isTransformFeedbackActiveUnpaused()) if (mState.isTransformFeedbackActiveUnpaused())
{ {
mXfbBaseVertex = firstVertex; mXfbBaseVertex = firstVertex;
invalidateDriverUniforms(); invalidateGraphicsDriverUniforms();
} }
DirtyBits dirtyBits = mGraphicsDirtyBits & dirtyBitMask; DirtyBits dirtyBits = mGraphicsDirtyBits & dirtyBitMask;
...@@ -716,23 +775,33 @@ angle::Result ContextVk::handleDirtyGraphicsTransformFeedbackBuffers( ...@@ -716,23 +775,33 @@ angle::Result ContextVk::handleDirtyGraphicsTransformFeedbackBuffers(
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result ContextVk::handleDirtyGraphicsDescriptorSets(const gl::Context *context, ANGLE_INLINE angle::Result ContextVk::handleDirtyDescriptorSetsImpl(
vk::CommandBuffer *commandBuffer) vk::CommandBuffer *commandBuffer,
VkPipelineBindPoint bindPoint,
const DriverUniformsDescriptorSet &driverUniforms)
{ {
ANGLE_TRY(mProgram->updateDescriptorSets(this, commandBuffer)); ANGLE_TRY(mProgram->updateDescriptorSets(this, commandBuffer));
// Bind the graphics descriptor sets. // Bind the driver descriptor set.
commandBuffer->bindDescriptorSets( commandBuffer->bindDescriptorSets(
mProgram->getPipelineLayout(), VK_PIPELINE_BIND_POINT_GRAPHICS, mProgram->getPipelineLayout(), bindPoint, kDriverUniformsDescriptorSetIndex, 1,
kDriverUniformsDescriptorSetIndex, 1, &mDriverUniformsDescriptorSet, 1, &driverUniforms.descriptorSet, 1, &driverUniforms.dynamicOffset);
&mDriverUniformsDynamicOffset);
return angle::Result::Continue; return angle::Result::Continue;
} }
angle::Result ContextVk::handleDirtyGraphicsDescriptorSets(const gl::Context *context,
vk::CommandBuffer *commandBuffer)
{
return handleDirtyDescriptorSetsImpl(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
mDriverUniforms[PipelineType::Graphics]);
}
angle::Result ContextVk::handleDirtyComputeDescriptorSets(const gl::Context *context, angle::Result ContextVk::handleDirtyComputeDescriptorSets(const gl::Context *context,
vk::CommandBuffer *commandBuffer) vk::CommandBuffer *commandBuffer)
{ {
return mProgram->updateDescriptorSets(this, commandBuffer); return handleDirtyDescriptorSetsImpl(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
mDriverUniforms[PipelineType::Compute]);
} }
angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo, angle::Result ContextVk::submitFrame(const VkSubmitInfo &submitInfo,
...@@ -1451,12 +1520,12 @@ void ContextVk::updateViewport(FramebufferVk *framebufferVk, ...@@ -1451,12 +1520,12 @@ void ContextVk::updateViewport(FramebufferVk *framebufferVk,
gl_vk::GetViewport(correctedRect, nearPlane, farPlane, invertViewport, gl_vk::GetViewport(correctedRect, nearPlane, farPlane, invertViewport,
framebufferVk->getState().getDimensions().height, &vkViewport); framebufferVk->getState().getDimensions().height, &vkViewport);
mGraphicsPipelineDesc->updateViewport(&mGraphicsPipelineTransition, vkViewport); mGraphicsPipelineDesc->updateViewport(&mGraphicsPipelineTransition, vkViewport);
invalidateDriverUniforms(); invalidateGraphicsDriverUniforms();
} }
void ContextVk::updateDepthRange(float nearPlane, float farPlane) void ContextVk::updateDepthRange(float nearPlane, float farPlane)
{ {
invalidateDriverUniforms(); invalidateGraphicsDriverUniforms();
mGraphicsPipelineDesc->updateDepthRange(&mGraphicsPipelineTransition, nearPlane, farPlane); mGraphicsPipelineDesc->updateDepthRange(&mGraphicsPipelineTransition, nearPlane, farPlane);
} }
...@@ -1766,6 +1835,7 @@ angle::Result ContextVk::syncState(const gl::Context *context, ...@@ -1766,6 +1835,7 @@ angle::Result ContextVk::syncState(const gl::Context *context,
break; break;
case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING: case gl::State::DIRTY_BIT_ATOMIC_COUNTER_BUFFER_BINDING:
invalidateCurrentShaderResources(); invalidateCurrentShaderResources();
invalidateDriverUniforms();
break; break;
case gl::State::DIRTY_BIT_IMAGE_BINDINGS: case gl::State::DIRTY_BIT_IMAGE_BINDINGS:
break; break;
...@@ -1998,10 +2068,18 @@ void ContextVk::invalidateCurrentShaderResources() ...@@ -1998,10 +2068,18 @@ void ContextVk::invalidateCurrentShaderResources()
} }
} }
void ContextVk::invalidateGraphicsDriverUniforms()
{
mGraphicsDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS);
mGraphicsDirtyBits.set(DIRTY_BIT_DESCRIPTOR_SETS);
}
void ContextVk::invalidateDriverUniforms() void ContextVk::invalidateDriverUniforms()
{ {
mGraphicsDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS); mGraphicsDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS);
mGraphicsDirtyBits.set(DIRTY_BIT_DESCRIPTOR_SETS); mGraphicsDirtyBits.set(DIRTY_BIT_DESCRIPTOR_SETS);
mComputeDirtyBits.set(DIRTY_BIT_DRIVER_UNIFORMS);
mComputeDirtyBits.set(DIRTY_BIT_DESCRIPTOR_SETS);
} }
void ContextVk::onDrawFramebufferChange(FramebufferVk *framebufferVk) void ContextVk::onDrawFramebufferChange(FramebufferVk *framebufferVk)
...@@ -2021,7 +2099,7 @@ void ContextVk::invalidateCurrentTransformFeedbackBuffers() ...@@ -2021,7 +2099,7 @@ void ContextVk::invalidateCurrentTransformFeedbackBuffers()
void ContextVk::onTransformFeedbackPauseResume() void ContextVk::onTransformFeedbackPauseResume()
{ {
invalidateDriverUniforms(); invalidateGraphicsDriverUniforms();
} }
angle::Result ContextVk::dispatchCompute(const gl::Context *context, angle::Result ContextVk::dispatchCompute(const gl::Context *context,
...@@ -2068,7 +2146,7 @@ angle::Result ContextVk::memoryBarrier(const gl::Context *context, GLbitfield ba ...@@ -2068,7 +2146,7 @@ angle::Result ContextVk::memoryBarrier(const gl::Context *context, GLbitfield ba
dstAccess |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; dstAccess |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
} }
mCommandGraph.memoryBarrier(srcAccess, dstAccess, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); mCommandGraph.memoryBarrier(srcAccess, dstAccess, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -2109,26 +2187,58 @@ VkColorComponentFlags ContextVk::getClearColorMask() const ...@@ -2109,26 +2187,58 @@ VkColorComponentFlags ContextVk::getClearColorMask() const
return mClearColorMask; return mClearColorMask;
} }
angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *context, void ContextVk::writeAtomicCounterBufferDriverUniformOffsets(uint32_t *offsetsOut,
vk::CommandBuffer *commandBuffer) size_t offsetsSize)
{ {
// Release any previously retained buffers. const VkDeviceSize offsetAlignment =
mDriverUniformsBuffer.releaseInFlightBuffers(this); mRenderer->getPhysicalDeviceProperties().limits.minStorageBufferOffsetAlignment;
size_t atomicCounterBufferCount = mState.getAtomicCounterBufferCount();
const gl::Rectangle &glViewport = mState.getViewport(); ASSERT(atomicCounterBufferCount <= offsetsSize * 4);
float halfRenderAreaHeight =
static_cast<float>(mDrawFramebuffer->getState().getDimensions().height) * 0.5f; for (uint32_t bufferIndex = 0; bufferIndex < atomicCounterBufferCount; ++bufferIndex)
{
uint32_t offsetDiff = 0;
const gl::OffsetBindingPointer<gl::Buffer> *atomicCounterBuffer =
&mState.getIndexedAtomicCounterBuffer(bufferIndex);
if (atomicCounterBuffer->get())
{
VkDeviceSize offset = atomicCounterBuffer->getOffset();
VkDeviceSize alignedOffset = (offset / offsetAlignment) * offsetAlignment;
// GL requires the atomic counter buffer offset to be aligned with uint.
ASSERT((offset - alignedOffset) % sizeof(uint32_t) == 0);
offsetDiff = static_cast<uint32_t>((offset - alignedOffset) / sizeof(uint32_t));
// We expect offsetDiff to fit in an 8-bit value. The maximum difference is
// minStorageBufferOffsetAlignment / 4, where minStorageBufferOffsetAlignment currently
// has a maximum value of 256 on any device.
ASSERT(offsetDiff < (1 << 8));
}
// The output array is already cleared prior to this call.
ASSERT(bufferIndex % 4 != 0 || offsetsOut[bufferIndex / 4] == 0);
offsetsOut[bufferIndex / 4] |= static_cast<uint8_t>(offsetDiff) << ((bufferIndex % 4) * 8);
}
}
angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *context,
vk::CommandBuffer *commandBuffer)
{
// Allocate a new region in the dynamic buffer. // Allocate a new region in the dynamic buffer.
uint8_t *ptr; uint8_t *ptr;
VkBuffer buffer; VkBuffer buffer;
VkDeviceSize offset;
bool newBuffer; bool newBuffer;
ANGLE_TRY(mDriverUniformsBuffer.allocate(this, sizeof(DriverUniforms), &ptr, &buffer, &offset, ANGLE_TRY(allocateDriverUniforms(sizeof(GraphicsDriverUniforms),
&newBuffer)); &mDriverUniforms[PipelineType::Graphics], &buffer, &ptr,
float scaleY = isViewportFlipEnabledForDrawFBO() ? -1.0f : 1.0f; &newBuffer));
mDriverUniformsDynamicOffset = static_cast<uint32_t>(offset); const gl::Rectangle &glViewport = mState.getViewport();
float halfRenderAreaHeight =
static_cast<float>(mDrawFramebuffer->getState().getDimensions().height) * 0.5f;
float scaleY = isViewportFlipEnabledForDrawFBO() ? -1.0f : 1.0f;
uint32_t xfbActiveUnpaused = mState.isTransformFeedbackActiveUnpaused(); uint32_t xfbActiveUnpaused = mState.isTransformFeedbackActiveUnpaused();
...@@ -2137,8 +2247,8 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co ...@@ -2137,8 +2247,8 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co
float depthRangeDiff = depthRangeFar - depthRangeNear; float depthRangeDiff = depthRangeFar - depthRangeNear;
// Copy and flush to the device. // Copy and flush to the device.
DriverUniforms *driverUniforms = reinterpret_cast<DriverUniforms *>(ptr); GraphicsDriverUniforms *driverUniforms = reinterpret_cast<GraphicsDriverUniforms *>(ptr);
*driverUniforms = { *driverUniforms = {
{static_cast<float>(glViewport.x), static_cast<float>(glViewport.y), {static_cast<float>(glViewport.x), static_cast<float>(glViewport.y),
static_cast<float>(glViewport.width), static_cast<float>(glViewport.height)}, static_cast<float>(glViewport.width), static_cast<float>(glViewport.height)},
halfRenderAreaHeight, halfRenderAreaHeight,
...@@ -2146,6 +2256,7 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co ...@@ -2146,6 +2256,7 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co
-scaleY, -scaleY,
xfbActiveUnpaused, xfbActiveUnpaused,
{}, {},
{},
{depthRangeNear, depthRangeFar, depthRangeDiff, 0.0f}}; {depthRangeNear, depthRangeFar, depthRangeDiff, 0.0f}};
if (xfbActiveUnpaused) if (xfbActiveUnpaused)
...@@ -2157,35 +2268,91 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co ...@@ -2157,35 +2268,91 @@ angle::Result ContextVk::handleDirtyGraphicsDriverUniforms(const gl::Context *co
driverUniforms->xfbBufferOffsets.size()); driverUniforms->xfbBufferOffsets.size());
} }
ANGLE_TRY(mDriverUniformsBuffer.flush(this)); writeAtomicCounterBufferDriverUniformOffsets(driverUniforms->acbBufferOffsets.data(),
driverUniforms->acbBufferOffsets.size());
if (newBuffer) return updateDriverUniformsDescriptorSet(buffer, newBuffer, sizeof(GraphicsDriverUniforms),
{ &mDriverUniforms[PipelineType::Graphics]);
// Allocate a new descriptor set. }
ANGLE_TRY(mDriverUniformsDescriptorPool.allocateSets(
this, mDriverUniformsSetLayout.get().ptr(), 1, &mDriverUniformsDescriptorPoolBinding,
&mDriverUniformsDescriptorSet));
// Update the driver uniform descriptor set. angle::Result ContextVk::handleDirtyComputeDriverUniforms(const gl::Context *context,
VkDescriptorBufferInfo bufferInfo = {}; vk::CommandBuffer *commandBuffer)
bufferInfo.buffer = buffer; {
bufferInfo.offset = 0; // Allocate a new region in the dynamic buffer.
bufferInfo.range = sizeof(DriverUniforms); uint8_t *ptr;
VkBuffer buffer;
bool newBuffer;
ANGLE_TRY(allocateDriverUniforms(sizeof(ComputeDriverUniforms),
&mDriverUniforms[PipelineType::Compute], &buffer, &ptr,
&newBuffer));
// Copy and flush to the device.
ComputeDriverUniforms *driverUniforms = reinterpret_cast<ComputeDriverUniforms *>(ptr);
*driverUniforms = {};
writeAtomicCounterBufferDriverUniformOffsets(driverUniforms->acbBufferOffsets.data(),
driverUniforms->acbBufferOffsets.size());
return updateDriverUniformsDescriptorSet(buffer, newBuffer, sizeof(ComputeDriverUniforms),
&mDriverUniforms[PipelineType::Compute]);
}
angle::Result ContextVk::allocateDriverUniforms(size_t driverUniformsSize,
DriverUniformsDescriptorSet *driverUniforms,
VkBuffer *bufferOut,
uint8_t **ptrOut,
bool *newBufferOut)
{
// Release any previously retained buffers.
driverUniforms->dynamicBuffer.releaseInFlightBuffers(this);
// Allocate a new region in the dynamic buffer.
VkDeviceSize offset;
ANGLE_TRY(driverUniforms->dynamicBuffer.allocate(this, driverUniformsSize, ptrOut, bufferOut,
&offset, newBufferOut));
driverUniforms->dynamicOffset = static_cast<uint32_t>(offset);
return angle::Result::Continue;
}
VkWriteDescriptorSet writeInfo = {}; angle::Result ContextVk::updateDriverUniformsDescriptorSet(
writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; VkBuffer buffer,
writeInfo.dstSet = mDriverUniformsDescriptorSet; bool newBuffer,
writeInfo.dstBinding = 0; size_t driverUniformsSize,
writeInfo.dstArrayElement = 0; DriverUniformsDescriptorSet *driverUniforms)
writeInfo.descriptorCount = 1; {
writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; ANGLE_TRY(driverUniforms->dynamicBuffer.flush(this));
writeInfo.pImageInfo = nullptr;
writeInfo.pTexelBufferView = nullptr;
writeInfo.pBufferInfo = &bufferInfo;
vkUpdateDescriptorSets(getDevice(), 1, &writeInfo, 0, nullptr); if (!newBuffer)
{
return angle::Result::Continue;
} }
// Allocate a new descriptor set.
ANGLE_TRY(mDriverUniformsDescriptorPool.allocateSets(
this, driverUniforms->descriptorSetLayout.get().ptr(), 1,
&driverUniforms->descriptorPoolBinding, &driverUniforms->descriptorSet));
// Update the driver uniform descriptor set.
VkDescriptorBufferInfo bufferInfo = {};
bufferInfo.buffer = buffer;
bufferInfo.offset = 0;
bufferInfo.range = driverUniformsSize;
VkWriteDescriptorSet writeInfo = {};
writeInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeInfo.dstSet = driverUniforms->descriptorSet;
writeInfo.dstBinding = 0;
writeInfo.dstArrayElement = 0;
writeInfo.descriptorCount = 1;
writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
writeInfo.pImageInfo = nullptr;
writeInfo.pTexelBufferView = nullptr;
writeInfo.pBufferInfo = &bufferInfo;
vkUpdateDescriptorSets(getDevice(), 1, &writeInfo, 0, nullptr);
return angle::Result::Continue; return angle::Result::Continue;
} }
...@@ -2643,10 +2810,11 @@ void ContextVk::waitForSwapchainImageIfNecessary() ...@@ -2643,10 +2810,11 @@ void ContextVk::waitForSwapchainImageIfNecessary()
} }
} }
vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc() const vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc(
VkShaderStageFlags shaderStages) const
{ {
vk::DescriptorSetLayoutDesc desc; vk::DescriptorSetLayoutDesc desc;
desc.update(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_ALL_GRAPHICS); desc.update(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, shaderStages);
return desc; return desc;
} }
} // namespace rx } // namespace rx
...@@ -300,7 +300,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -300,7 +300,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
RenderPassCache &getRenderPassCache() { return mRenderPassCache; } RenderPassCache &getRenderPassCache() { return mRenderPassCache; }
vk::DescriptorSetLayoutDesc getDriverUniformsDescriptorSetDesc() const; vk::DescriptorSetLayoutDesc getDriverUniformsDescriptorSetDesc(
VkShaderStageFlags shaderStages) const;
// We use texture serials to optimize texture binding updates. Each permutation of a // We use texture serials to optimize texture binding updates. Each permutation of a
// {VkImage/VkSampler} generates a unique serial. These serials are combined to form a unique // {VkImage/VkSampler} generates a unique serial. These serials are combined to form a unique
...@@ -335,6 +336,15 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -335,6 +336,15 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers; std::array<DirtyBitHandler, DIRTY_BIT_MAX> mGraphicsDirtyBitHandlers;
std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers; std::array<DirtyBitHandler, DIRTY_BIT_MAX> mComputeDirtyBitHandlers;
enum class PipelineType
{
Graphics = 0,
Compute = 1,
InvalidEnum = 2,
EnumCount = 2,
};
angle::Result setupDraw(const gl::Context *context, angle::Result setupDraw(const gl::Context *context,
gl::PrimitiveMode mode, gl::PrimitiveMode mode,
GLint firstVertex, GLint firstVertex,
...@@ -386,6 +396,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -386,6 +396,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
void invalidateCurrentTextures(); void invalidateCurrentTextures();
void invalidateCurrentShaderResources(); void invalidateCurrentShaderResources();
void invalidateGraphicsDriverUniforms();
void invalidateDriverUniforms(); void invalidateDriverUniforms();
// Handlers for graphics pipeline dirty bits. // Handlers for graphics pipeline dirty bits.
...@@ -413,6 +424,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -413,6 +424,8 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
vk::CommandBuffer *commandBuffer); vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyComputeTextures(const gl::Context *context, angle::Result handleDirtyComputeTextures(const gl::Context *context,
vk::CommandBuffer *commandBuffer); vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyComputeDriverUniforms(const gl::Context *context,
vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyComputeShaderResources(const gl::Context *context, angle::Result handleDirtyComputeShaderResources(const gl::Context *context,
vk::CommandBuffer *commandBuffer); vk::CommandBuffer *commandBuffer);
angle::Result handleDirtyComputeDescriptorSets(const gl::Context *context, angle::Result handleDirtyComputeDescriptorSets(const gl::Context *context,
...@@ -425,6 +438,21 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -425,6 +438,21 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
angle::Result handleDirtyShaderResourcesImpl(const gl::Context *context, angle::Result handleDirtyShaderResourcesImpl(const gl::Context *context,
vk::CommandBuffer *commandBuffer, vk::CommandBuffer *commandBuffer,
vk::CommandGraphResource *recorder); vk::CommandGraphResource *recorder);
struct DriverUniformsDescriptorSet;
angle::Result handleDirtyDescriptorSetsImpl(vk::CommandBuffer *commandBuffer,
VkPipelineBindPoint bindPoint,
const DriverUniformsDescriptorSet &driverUniforms);
angle::Result allocateDriverUniforms(size_t driverUniformsSize,
DriverUniformsDescriptorSet *driverUniforms,
VkBuffer *bufferOut,
uint8_t **ptrOut,
bool *newBufferOut);
angle::Result updateDriverUniformsDescriptorSet(VkBuffer buffer,
bool newBuffer,
size_t driverUniformsSize,
DriverUniformsDescriptorSet *driverUniforms);
void writeAtomicCounterBufferDriverUniformOffsets(uint32_t *offsetsOut, size_t offsetsSize);
angle::Result submitFrame(const VkSubmitInfo &submitInfo, angle::Result submitFrame(const VkSubmitInfo &submitInfo,
vk::PrimaryCommandBuffer &&commandBuffer); vk::PrimaryCommandBuffer &&commandBuffer);
...@@ -503,27 +531,22 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO ...@@ -503,27 +531,22 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
// at the end of the command buffer to make that write available to the host. // at the end of the command buffer to make that write available to the host.
bool mIsAnyHostVisibleBufferWritten; bool mIsAnyHostVisibleBufferWritten;
// For shader uniforms such as gl_DepthRange and the viewport size. struct DriverUniformsDescriptorSet
struct DriverUniforms
{ {
std::array<float, 4> viewport; vk::DynamicBuffer dynamicBuffer;
VkDescriptorSet descriptorSet;
float halfRenderAreaHeight; uint32_t dynamicOffset;
float viewportYScale; vk::BindingPointer<vk::DescriptorSetLayout> descriptorSetLayout;
float negViewportYScale; vk::RefCountedDescriptorPoolBinding descriptorPoolBinding;
uint32_t xfbActiveUnpaused;
std::array<int32_t, 4> xfbBufferOffsets; DriverUniformsDescriptorSet();
~DriverUniformsDescriptorSet();
// We'll use x, y, z for near / far / diff respectively. void init(RendererVk *rendererVk);
std::array<float, 4> depthRange; void destroy(VkDevice device);
}; };
vk::DynamicBuffer mDriverUniformsBuffer; angle::PackedEnumMap<PipelineType, DriverUniformsDescriptorSet> mDriverUniforms;
VkDescriptorSet mDriverUniformsDescriptorSet;
uint32_t mDriverUniformsDynamicOffset;
vk::BindingPointer<vk::DescriptorSetLayout> mDriverUniformsSetLayout;
vk::RefCountedDescriptorPoolBinding mDriverUniformsDescriptorPoolBinding;
// This cache should also probably include the texture index (shader location) and array // This cache should also probably include the texture index (shader location) and array
// index (also in the shader). This info is used in the descriptor update step. // index (also in the shader). This info is used in the descriptor update step.
......
...@@ -668,12 +668,6 @@ void AssignUniformBindings(gl::ShaderMap<IntermediateShaderSource> *shaderSource ...@@ -668,12 +668,6 @@ void AssignUniformBindings(gl::ShaderMap<IntermediateShaderSource> *shaderSource
} }
} }
if (!(*shaderSources)[gl::ShaderType::Compute].empty())
{
// Compute doesn't need driver uniforms.
return;
}
// Substitute layout and qualifier strings for the driver uniforms block. // Substitute layout and qualifier strings for the driver uniforms block.
const std::string driverBlockLayoutString = const std::string driverBlockLayoutString =
"set = " + Str(kDriverUniformsDescriptorSetIndex) + ", binding = 0"; "set = " + Str(kDriverUniformsDescriptorSetIndex) + ", binding = 0";
......
...@@ -229,6 +229,7 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer> ...@@ -229,6 +229,7 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer>
VkDescriptorType descType, VkDescriptorType descType,
uint32_t bindingIndex, uint32_t bindingIndex,
uint32_t arrayElement, uint32_t arrayElement,
VkDeviceSize requiredOffsetAlignment,
VkDescriptorBufferInfo *bufferInfoOut, VkDescriptorBufferInfo *bufferInfoOut,
VkWriteDescriptorSet *writeInfoOut) VkWriteDescriptorSet *writeInfoOut)
{ {
...@@ -241,7 +242,7 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer> ...@@ -241,7 +242,7 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer>
ASSERT(bufferBinding.getSize() >= 0); ASSERT(bufferBinding.getSize() >= 0);
BufferVk *bufferVk = vk::GetImpl(buffer); BufferVk *bufferVk = vk::GetImpl(buffer);
GLintptr offset = bufferBinding.getOffset(); VkDeviceSize offset = bufferBinding.getOffset();
VkDeviceSize size = bufferBinding.getSize(); VkDeviceSize size = bufferBinding.getSize();
vk::BufferHelper &bufferHelper = bufferVk->getBuffer(); vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
...@@ -255,6 +256,19 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer> ...@@ -255,6 +256,19 @@ void WriteBufferDescriptorSetBinding(const gl::OffsetBindingPointer<gl::Buffer>
size = std::min(size, maxSize); size = std::min(size, maxSize);
} }
// If requiredOffsetAlignment is 0, the buffer offset is guaranteed to have the necessary
// alignment through other means (the backend specifying the alignment through a GLES limit that
// the frontend then enforces). If it's not 0, we need to bind the buffer at an offset that's
// aligned. The difference in offsets is communicated to the shader via driver uniforms.
if (requiredOffsetAlignment)
{
VkDeviceSize alignedOffset = (offset / requiredOffsetAlignment) * requiredOffsetAlignment;
VkDeviceSize offsetDiff = offset - alignedOffset;
offset = alignedOffset;
size += offsetDiff;
}
bufferInfoOut->buffer = bufferHelper.getBuffer().getHandle(); bufferInfoOut->buffer = bufferHelper.getBuffer().getHandle();
bufferInfoOut->offset = offset; bufferInfoOut->offset = offset;
bufferInfoOut->range = size; bufferInfoOut->range = size;
...@@ -565,8 +579,10 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf ...@@ -565,8 +579,10 @@ angle::Result ProgramVk::linkImpl(const gl::Context *glContext, gl::InfoLog &inf
ANGLE_TRY(renderer->getDescriptorSetLayout(contextVk, texturesSetDesc, ANGLE_TRY(renderer->getDescriptorSetLayout(contextVk, texturesSetDesc,
&mDescriptorSetLayouts[kTextureDescriptorSetIndex])); &mDescriptorSetLayouts[kTextureDescriptorSetIndex]));
VkShaderStageFlags driverUniformsStages =
mState.isCompute() ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_ALL_GRAPHICS;
vk::DescriptorSetLayoutDesc driverUniformsSetDesc = vk::DescriptorSetLayoutDesc driverUniformsSetDesc =
contextVk->getDriverUniformsDescriptorSetDesc(); contextVk->getDriverUniformsDescriptorSetDesc(driverUniformsStages);
ANGLE_TRY(renderer->getDescriptorSetLayout( ANGLE_TRY(renderer->getDescriptorSetLayout(
contextVk, driverUniformsSetDesc, contextVk, driverUniformsSetDesc,
&mDescriptorSetLayouts[kDriverUniformsDescriptorSetIndex])); &mDescriptorSetLayouts[kDriverUniformsDescriptorSetIndex]));
...@@ -1238,7 +1254,7 @@ void ProgramVk::updateBuffersDescriptorSet(ContextVk *contextVk, ...@@ -1238,7 +1254,7 @@ void ProgramVk::updateBuffersDescriptorSet(ContextVk *contextVk,
VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount]; VkWriteDescriptorSet &writeInfo = writeDescriptorInfo[writeCount];
WriteBufferDescriptorSetBinding(bufferBinding, maxBlockSize, descriptorSet, descriptorType, WriteBufferDescriptorSetBinding(bufferBinding, maxBlockSize, descriptorSet, descriptorType,
binding, arrayElement, &bufferInfo, &writeInfo); binding, arrayElement, 0, &bufferInfo, &writeInfo);
BufferVk *bufferVk = vk::GetImpl(bufferBinding.get()); BufferVk *bufferVk = vk::GetImpl(bufferBinding.get());
vk::BufferHelper &bufferHelper = bufferVk->getBuffer(); vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
...@@ -1281,6 +1297,10 @@ void ProgramVk::updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk, ...@@ -1281,6 +1297,10 @@ void ProgramVk::updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk,
gl::AtomicCounterBuffersArray<VkWriteDescriptorSet> writeDescriptorInfo; gl::AtomicCounterBuffersArray<VkWriteDescriptorSet> writeDescriptorInfo;
gl::AtomicCounterBufferMask writtenBindings; gl::AtomicCounterBufferMask writtenBindings;
RendererVk *rendererVk = contextVk->getRenderer();
const VkDeviceSize requiredOffsetAlignment =
rendererVk->getPhysicalDeviceProperties().limits.minStorageBufferOffsetAlignment;
// Write atomic counter buffers. // Write atomic counter buffers.
for (uint32_t bufferIndex = 0; bufferIndex < atomicCounterBuffers.size(); ++bufferIndex) for (uint32_t bufferIndex = 0; bufferIndex < atomicCounterBuffers.size(); ++bufferIndex)
{ {
...@@ -1299,7 +1319,7 @@ void ProgramVk::updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk, ...@@ -1299,7 +1319,7 @@ void ProgramVk::updateAtomicCounterBuffersDescriptorSet(ContextVk *contextVk,
WriteBufferDescriptorSetBinding(bufferBinding, 0, descriptorSet, WriteBufferDescriptorSetBinding(bufferBinding, 0, descriptorSet,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStart, binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStart, binding,
&bufferInfo, &writeInfo); requiredOffsetAlignment, &bufferInfo, &writeInfo);
BufferVk *bufferVk = vk::GetImpl(bufferBinding.get()); BufferVk *bufferVk = vk::GetImpl(bufferBinding.get());
vk::BufferHelper &bufferHelper = bufferVk->getBuffer(); vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
......
...@@ -105,9 +105,6 @@ ...@@ -105,9 +105,6 @@
// Blend equations: // Blend equations:
3586 VULKAN : KHR-GLES31.core.blend_equation_advanced.* = SKIP 3586 VULKAN : KHR-GLES31.core.blend_equation_advanced.* = SKIP
// Atomic Counter buffers:
3566 VULKAN : KHR-GLES31.core.shader_atomic_counters.advanced-usage-multi-stage = FAIL
// Storage image: // Storage image:
3563 VULKAN : KHR-GLES31.core.layout_binding.sampler2D_layout_binding_texture_ComputeShader = FAIL 3563 VULKAN : KHR-GLES31.core.layout_binding.sampler2D_layout_binding_texture_ComputeShader = FAIL
3563 VULKAN : KHR-GLES31.core.layout_binding.block_layout_binding_block_ComputeShader = FAIL 3563 VULKAN : KHR-GLES31.core.layout_binding.block_layout_binding_block_ComputeShader = FAIL
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment