Commit 71c1138d by Tobin Ehlis Committed by Commit Bot

Vulkan: Emulate instanced attrib divisor

This sets instancedArrays[ANGLE|EXT] extenstions as always supported regardless of underlying Vulkan HW's max vertex attrib divisor. Then detect instances where app sets a divisor that isn't supported by hardware and emulate those cases. Emulations is accomplished by copying the instanced attribs to a new buffer where each attrib is present once per instance, using the attrib divisor value as a factor to replicate the attribs, and then setting the actual divisor value for the draw to "1". Also, we only store 8 bits for the divisor used in the PSO, so this code also handles emulation of the case where divisor is > 255. This is passing all of the drawInstanced/Elements dEQP tests where divisor has to be emulated. Also enabled end2end InstancingTestES3 for Vulkan backend. Bug: angleproject:2672 Change-Id: I9932f9eab49b16a19e8bbd35dacaf3b5a27a213f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1758689Reviewed-by: 's avatarCourtney Goeltzenleuchter <courtneygo@google.com> Commit-Queue: Tobin Ehlis <tobine@google.com>
parent cb16fb5f
......@@ -477,12 +477,14 @@ angle::Result ContextVk::setupDraw(const gl::Context *context,
}
// Must be called before the command buffer is started. Can call finish.
if (context->getStateCache().hasAnyActiveClientAttrib())
if (mVertexArray->getStreamingVertexAttribsMask().any())
{
ASSERT(firstVertexOrInvalid != -1);
ANGLE_TRY(mVertexArray->updateClientAttribs(context, firstVertexOrInvalid,
vertexOrIndexCount, instanceCount,
indexTypeOrInvalid, indices));
// All client attribs & any emulated buffered attribs will be updated
ANGLE_TRY(mVertexArray->updateStreamedAttribs(context, firstVertexOrInvalid,
vertexOrIndexCount, instanceCount,
indexTypeOrInvalid, indices));
mGraphicsDirtyBits.set(DIRTY_BIT_VERTEX_BUFFERS);
}
......
......@@ -213,9 +213,10 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
GLuint relativeOffset)
{
invalidateVertexAndIndexBuffers();
mGraphicsPipelineDesc->updateVertexInput(&mGraphicsPipelineTransition,
static_cast<uint32_t>(attribIndex), stride,
divisor, format, relativeOffset);
// Set divisor to 1 for attribs with emulated divisor
mGraphicsPipelineDesc->updateVertexInput(
&mGraphicsPipelineTransition, static_cast<uint32_t>(attribIndex), stride,
divisor > mRenderer->getMaxVertexAttribDivisor() ? 1 : divisor, format, relativeOffset);
}
void invalidateDefaultAttribute(size_t attribIndex);
......
......@@ -1056,7 +1056,11 @@ angle::Result RendererVk::initializeDevice(DisplayVk *displayVk, uint32_t queueF
deviceProperties.pNext = &divisorProperties;
vkGetPhysicalDeviceProperties2KHR(mPhysicalDevice, &deviceProperties);
mMaxVertexAttribDivisor = divisorProperties.maxVertexAttribDivisor;
// We only store 8 bit divisor in GraphicsPipelineDesc so capping value & we emulate if
// exceeded
mMaxVertexAttribDivisor =
std::min(divisorProperties.maxVertexAttribDivisor,
static_cast<uint32_t>(std::numeric_limits<uint8_t>::max()));
createInfo.pNext = &enabledFeatures;
}
......
......@@ -125,6 +125,7 @@ class RendererVk : angle::NonCopyable
ASSERT(mFeaturesInitialized);
return mFeatures;
}
uint32_t getMaxVertexAttribDivisor() const { return mMaxVertexAttribDivisor; }
bool isMockICDEnabled() const { return mEnabledICD == vk::ICD::Mock; }
......
......@@ -54,17 +54,37 @@ angle::Result StreamVertexData(ContextVk *contextVk,
size_t bytesToAllocate,
size_t destOffset,
size_t vertexCount,
size_t stride,
size_t sourceStride,
size_t destStride,
VertexCopyFunction vertexLoadFunction,
vk::BufferHelper **bufferOut,
VkDeviceSize *bufferOffsetOut)
VkDeviceSize *bufferOffsetOut,
uint32_t replicateCount)
{
uint8_t *dst = nullptr;
ANGLE_TRY(dynamicBuffer->allocate(contextVk, bytesToAllocate, &dst, nullptr, bufferOffsetOut,
nullptr));
*bufferOut = dynamicBuffer->getCurrentBuffer();
dst += destOffset;
vertexLoadFunction(sourceData, stride, vertexCount, dst);
if (replicateCount == 1)
{
vertexLoadFunction(sourceData, sourceStride, vertexCount, dst);
}
else
{
ASSERT(replicateCount > 1);
uint32_t sourceRemainingCount = replicateCount - 1;
for (size_t dataCopied = 0; dataCopied < bytesToAllocate;
dataCopied += destStride, dst += destStride, sourceRemainingCount--)
{
vertexLoadFunction(sourceData, sourceStride, 1, dst);
if (sourceRemainingCount == 0)
{
sourceData += sourceStride;
sourceRemainingCount = replicateCount;
}
}
}
ANGLE_TRY(dynamicBuffer->flush(contextVk));
return angle::Result::Continue;
......@@ -334,9 +354,9 @@ angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
srcBytes += binding.getOffset() + relativeOffset;
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
ANGLE_TRY(StreamVertexData(contextVk, &conversion->data, srcBytes, numVertices * dstFormatSize,
0, numVertices, binding.getStride(), vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex],
&conversion->lastAllocationOffset));
0, numVertices, binding.getStride(), srcFormatSize,
vertexFormat.vertexLoadFunction, &mCurrentArrayBuffers[attribIndex],
&conversion->lastAllocationOffset, 1));
srcBuffer->unmapImpl(contextVk);
ASSERT(conversion->dirty);
......@@ -460,6 +480,11 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
GLuint stride;
bool anyVertexBufferConvertedOnGpu = false;
gl::Buffer *bufferGL = binding.getBuffer().get();
// Emulated and/or client-side attribs will be streamed
bool isStreamingVertexAttrib =
(binding.getDivisor() > renderer->getMaxVertexAttribDivisor()) || (bufferGL == nullptr);
mStreamingVertexAttribsMask.set(attribIndex, isStreamingVertexAttrib);
if (bufferGL)
{
BufferVk *bufferVk = vk::GetImpl(bufferGL);
......@@ -563,17 +588,24 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
return angle::Result::Continue;
}
angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
GLint firstVertex,
GLsizei vertexOrIndexCount,
GLsizei instanceCount,
gl::DrawElementsType indexTypeOrInvalid,
const void *indices)
// Handle copying client attribs and/or expanding attrib buffer in case where attribute
// divisor value has to be emulated.
angle::Result VertexArrayVk::updateStreamedAttribs(const gl::Context *context,
GLint firstVertex,
GLsizei vertexOrIndexCount,
GLsizei instanceCount,
gl::DrawElementsType indexTypeOrInvalid,
const void *indices)
{
ContextVk *contextVk = vk::GetImpl(context);
const gl::AttributesMask &clientAttribs = context->getStateCache().getActiveClientAttribsMask();
const gl::AttributesMask activeAttribs =
context->getStateCache().getActiveClientAttribsMask() |
context->getStateCache().getActiveBufferedAttribsMask();
const gl::AttributesMask activeStreamedAttribs = mStreamingVertexAttribsMask & activeAttribs;
ASSERT(clientAttribs.any());
// Early return for corner case where emulated buffered attribs are not active
if (!activeStreamedAttribs.any())
return angle::Result::Continue;
GLint startVertex;
size_t vertexCount;
......@@ -586,13 +618,13 @@ angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
const auto &attribs = mState.getVertexAttributes();
const auto &bindings = mState.getVertexBindings();
// TODO(fjhenigman): When we have a bunch of interleaved attributes, they end up
// TODO: When we have a bunch of interleaved attributes, they end up
// un-interleaved, wasting space and copying time. Consider improving on that.
for (size_t attribIndex : clientAttribs)
for (size_t attribIndex : activeStreamedAttribs)
{
const gl::VertexAttribute &attrib = attribs[attribIndex];
const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
ASSERT(attrib.enabled && binding.getBuffer().get() == nullptr);
ASSERT(attrib.enabled);
const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
GLuint stride = vertexFormat.bufferFormat().pixelBytes;
......@@ -600,19 +632,51 @@ angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
const uint8_t *src = static_cast<const uint8_t *>(attrib.pointer);
if (binding.getDivisor() > 0)
const uint32_t divisor = binding.getDivisor();
if (divisor > 0)
{
// instanced attrib
size_t count = UnsignedCeilDivide(instanceCount, binding.getDivisor());
size_t bytesToAllocate = count * stride;
ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
count, binding.getStride(), vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex],
&mCurrentArrayBufferOffsets[attribIndex]));
// Instanced attrib
if (divisor > renderer->getMaxVertexAttribDivisor())
{
// Emulated attrib
BufferVk *bufferVk = nullptr;
if (binding.getBuffer().get() != nullptr)
{
// Map buffer to expand attribs for divisor emulation
bufferVk = vk::GetImpl(binding.getBuffer().get());
void *buffSrc = nullptr;
ANGLE_TRY(bufferVk->mapImpl(contextVk, &buffSrc));
src = reinterpret_cast<const uint8_t *>(buffSrc);
}
// Divisor will be set to 1 & so update buffer to have 1 attrib per instance
size_t bytesToAllocate = instanceCount * stride;
ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
instanceCount, binding.getStride(), stride,
vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex],
&mCurrentArrayBufferOffsets[attribIndex], divisor));
if (bufferVk)
{
bufferVk->unmapImpl(contextVk);
}
}
else
{
ASSERT(binding.getBuffer().get() == nullptr);
size_t count = UnsignedCeilDivide(instanceCount, divisor);
size_t bytesToAllocate = count * stride;
ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
count, binding.getStride(), stride,
vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex],
&mCurrentArrayBufferOffsets[attribIndex], 1));
}
}
else
{
ASSERT(binding.getBuffer().get() == nullptr);
// Allocate space for startVertex + vertexCount so indexing will work. If we don't
// start at zero all the indices will be off.
// Only vertexCount vertices will be used by the upcoming draw so that is all we copy.
......@@ -622,8 +686,8 @@ angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
ANGLE_TRY(StreamVertexData(
contextVk, &mDynamicVertexData, src, bytesToAllocate, destOffset, vertexCount,
binding.getStride(), vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex], &mCurrentArrayBufferOffsets[attribIndex]));
binding.getStride(), stride, vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex], &mCurrentArrayBufferOffsets[attribIndex], 1));
}
mCurrentArrayBufferHandles[attribIndex] =
......
......@@ -37,12 +37,12 @@ class VertexArrayVk : public VertexArrayImpl
VkBuffer bufferHandle,
uint32_t offset);
angle::Result updateClientAttribs(const gl::Context *context,
GLint firstVertex,
GLsizei vertexOrIndexCount,
GLsizei instanceCount,
gl::DrawElementsType indexTypeOrInvalid,
const void *indices);
angle::Result updateStreamedAttribs(const gl::Context *context,
GLint firstVertex,
GLsizei vertexOrIndexCount,
GLsizei instanceCount,
gl::DrawElementsType indexTypeOrInvalid,
const void *indices);
angle::Result handleLineLoop(ContextVk *contextVk,
GLint firstVertex,
......@@ -92,6 +92,11 @@ class VertexArrayVk : public VertexArrayImpl
size_t indexCount,
const void *sourcePointer);
const gl::AttributesMask &getStreamingVertexAttribsMask() const
{
return mStreamingVertexAttribsMask;
}
private:
void setDefaultPackedInput(ContextVk *contextVk, size_t attribIndex);
......@@ -133,6 +138,9 @@ class VertexArrayVk : public VertexArrayImpl
// Vulkan does not allow binding a null vertex buffer. We use a dummy as a placeholder.
vk::BufferHelper mTheNullBuffer;
// Track client and/or emulated attribs that we have to stream their buffer contents
gl::AttributesMask mStreamingVertexAttribsMask;
};
} // namespace rx
......
......@@ -16,6 +16,7 @@
#include "libANGLE/renderer/vulkan/FramebufferVk.h"
#include "libANGLE/renderer/vulkan/ProgramVk.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/renderer/vulkan/VertexArrayVk.h"
#include "libANGLE/renderer/vulkan/vk_format_utils.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h"
......@@ -673,7 +674,6 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
divisorState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
divisorState.pVertexBindingDivisors = divisorDesc.data();
for (size_t attribIndexSizeT : activeAttribLocationsMask)
{
const uint32_t attribIndex = static_cast<uint32_t>(attribIndexSizeT);
......@@ -885,10 +885,6 @@ void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *tra
{
vk::PackedAttribDesc &packedAttrib = mVertexInputAttribs.attribs[attribIndex];
// TODO: Handle the case where the divisor overflows the field that holds it.
// http://anglebug.com/2672
ASSERT(divisor <= std::numeric_limits<decltype(packedAttrib.divisor)>::max());
SetBitField(packedAttrib.stride, stride);
SetBitField(packedAttrib.divisor, divisor);
......
......@@ -38,7 +38,7 @@ using RefCountedPipelineLayout = RefCounted<PipelineLayout>;
// fewer bits. For example, boolean values could be represented by a single bit instead
// of a uint8_t. However at the current time there are concerns about the portability
// of bitfield operators, and complexity issues with using bit mask operations. This is
// something likely we will want to investigate as the Vulkan implementation progresses.
// something we will likely want to investigate as the Vulkan implementation progresses.
//
// Second implementation note: the struct packing is also a bit fragile, and some of the
// packing requirements depend on using alignas and field ordering to get the result of
......@@ -164,8 +164,6 @@ static_assert(sizeof(AttachmentOpsArray) == 20, "Size check failed");
struct PackedAttribDesc final
{
uint8_t format;
// TODO(http://anglebug.com/2672): Emulate divisors greater than UBYTE_MAX.
uint8_t divisor;
// Can only take 11 bits on NV.
......@@ -315,9 +313,9 @@ constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
constexpr size_t kGraphicsPipelineDescSumOfSizes =
kVertexInputAttributesSize + kPackedInputAssemblyAndColorBlendStateSize +
kPackedRasterizationAndMultisampleStateSize + kPackedDepthStencilStateSize +
kRenderPassDescSize + sizeof(VkViewport) + sizeof(VkRect2D);
kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
sizeof(VkRect2D);
// Number of dirty bits in the dirty bit set.
constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
......
......@@ -82,10 +82,9 @@ void RendererVk::ensureCapsInitialized() const
mNativeExtensions.vertexHalfFloat = true;
// TODO: Enable this always and emulate instanced draws if any divisor exceeds the maximum
// supported. http://anglebug.com/2672
mNativeExtensions.instancedArraysANGLE = mMaxVertexAttribDivisor > 1;
mNativeExtensions.instancedArraysEXT = mMaxVertexAttribDivisor > 1;
// Enabled in HW if VK_EXT_vertex_attribute_divisor available, otherwise emulated
mNativeExtensions.instancedArraysANGLE = true;
mNativeExtensions.instancedArraysEXT = true;
// Only expose robust buffer access if the physical device supports it.
mNativeExtensions.robustBufferAccessBehavior =
......
......@@ -555,16 +555,6 @@
3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.link_program = FAIL
3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.use_program = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.2*_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.4_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.mixed.2*_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.mixed.4_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.attribute_divisor.2*_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.attribute_divisor.4_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.mixed.2*_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.mixed.4_instances = FAIL
2672 VULKAN : dEQP-GLES3.functional.instanced.types* = FAIL
// Polygon offset:
3678 VULKAN : dEQP-GLES3.functional.polygon_offset.float32_result_depth_clamp = FAIL
3678 VULKAN : dEQP-GLES3.functional.polygon_offset.float32_factor_1_slope = FAIL
......
......@@ -588,7 +588,7 @@ TEST_P(InstancingTestES3, LargestDivisor)
<< "Vertex attrib divisor read was not the same that was passed in.";
}
ANGLE_INSTANTIATE_TEST(InstancingTestES3, ES3_OPENGL(), ES3_OPENGLES(), ES3_D3D11());
ANGLE_INSTANTIATE_TEST(InstancingTestES3, ES3_OPENGL(), ES3_OPENGLES(), ES3_D3D11(), ES3_VULKAN());
ANGLE_INSTANTIATE_TEST(InstancingTestES31, ES31_OPENGL(), ES31_OPENGLES(), ES31_D3D11());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment