Commit 2450d8dd by Jamie Madill Committed by Commit Bot

Vulkan: Squash Vertex Pipeline info.

Instead of using 8 bytes per vertex we can reduce the space used for the divisor to 8 bytes. For larger values than 255 we can emulate the divisor by unrolling the draw call. We will likely need to do this in any case for instanced draws when the instancing extension isn't available. The tighter packing will allow for us to move the viewport and scissor back into the pipeline description. It seems this is much faster than using dynamic state. Every state change that would pull in a new Pipeline would need the viewport and scissor re-applied. It seems these driver calls are costly. Does not improve perf significantly but enables future improvements. Bug: angleproject:3013 Change-Id: I1a41c3acadc6fbd47c7a7b961c706e82f78de936 Reviewed-on: https://chromium-review.googlesource.com/c/1390358 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarYuly Novikov <ynovikov@chromium.org>
parent a3b64e86
...@@ -280,7 +280,7 @@ DestT Int4Array_Get(const uint8_t *arrayBytes, uint32_t arrayIndex) ...@@ -280,7 +280,7 @@ DestT Int4Array_Get(const uint8_t *arrayBytes, uint32_t arrayIndex)
// Helper macro that casts to a bitfield type then verifies no bits were dropped. // Helper macro that casts to a bitfield type then verifies no bits were dropped.
#define SetBitField(lhs, rhs) \ #define SetBitField(lhs, rhs) \
lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \ lhs = static_cast<typename std::decay<decltype(lhs)>::type>(rhs); \
ASSERT(static_cast<decltype(rhs)>(lhs) == rhs); ASSERT(static_cast<decltype(rhs)>(lhs) == (rhs));
// When converting a byte number to a transition bit index we can shift instead of divide. // When converting a byte number to a transition bit index we can shift instead of divide.
constexpr size_t kTransitionByteShift = Log2(kGraphicsPipelineDirtyBitBytes); constexpr size_t kTransitionByteShift = Log2(kGraphicsPipelineDirtyBitBytes);
...@@ -568,19 +568,19 @@ angle::Result GraphicsPipelineDesc::initializePipeline( ...@@ -568,19 +568,19 @@ angle::Result GraphicsPipelineDesc::initializePipeline(
{ {
const uint32_t attribIndex = static_cast<uint32_t>(attribIndexSizeT); const uint32_t attribIndex = static_cast<uint32_t>(attribIndexSizeT);
VkVertexInputBindingDescription &bindingDesc = bindingDescs[vertexAttribCount]; VkVertexInputBindingDescription &bindingDesc = bindingDescs[vertexAttribCount];
VkVertexInputAttributeDescription &attribDesc = attributeDescs[vertexAttribCount]; VkVertexInputAttributeDescription &attribDesc = attributeDescs[vertexAttribCount];
const PackedVertexInputBindingDesc &packedBinding = mVertexInputBindings[attribIndex]; const PackedAttribDesc &packedAttrib = mVertexInputAttribs.attribs[attribIndex];
bindingDesc.binding = attribIndex; bindingDesc.binding = attribIndex;
bindingDesc.inputRate = static_cast<VkVertexInputRate>(packedBinding.inputRate); bindingDesc.inputRate = static_cast<VkVertexInputRate>(packedAttrib.inputRate);
bindingDesc.stride = static_cast<uint32_t>(packedBinding.stride); bindingDesc.stride = static_cast<uint32_t>(packedAttrib.stride);
// The binding or location might change in future ES versions. // The binding index could become more dynamic in ES 3.1.
attribDesc.binding = attribIndex; attribDesc.binding = attribIndex;
attribDesc.format = static_cast<VkFormat>(mVertexInputAttribs.formats[attribIndex]); attribDesc.format = static_cast<VkFormat>(packedAttrib.format);
attribDesc.location = static_cast<uint32_t>(attribIndex); attribDesc.location = static_cast<uint32_t>(attribIndex);
attribDesc.offset = mVertexInputAttribs.offsets[attribIndex]; attribDesc.offset = packedAttrib.offset;
vertexAttribCount++; vertexAttribCount++;
} }
...@@ -728,25 +728,30 @@ void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *tra ...@@ -728,25 +728,30 @@ void GraphicsPipelineDesc::updateVertexInput(GraphicsPipelineTransitionBits *tra
VkFormat format, VkFormat format,
GLuint relativeOffset) GLuint relativeOffset)
{ {
vk::PackedVertexInputBindingDesc &bindingDesc = mVertexInputBindings[attribIndex]; vk::PackedAttribDesc &packedAttrib = mVertexInputAttribs.attribs[attribIndex];
bindingDesc.stride = static_cast<uint16_t>(stride);
bindingDesc.inputRate = static_cast<uint16_t>(divisor > 0 ? VK_VERTEX_INPUT_RATE_INSTANCE // TODO(http://anglebug.com/2672): This will need to be updated to support instancing.
: VK_VERTEX_INPUT_RATE_VERTEX); ASSERT(divisor == 0);
constexpr size_t kBindingBaseBit =
offsetof(GraphicsPipelineDesc, mVertexInputBindings) >> kTransitionByteShift; SetBitField(packedAttrib.stride, stride);
transition->set(kBindingBaseBit + attribIndex); SetBitField(packedAttrib.inputRate,
static_assert(kVertexInputBindingSize == 4, "Size mismatch"); divisor > 0 ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX);
if (format == VK_FORMAT_UNDEFINED) if (format == VK_FORMAT_UNDEFINED)
{ {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
SetBitField(mVertexInputAttribs.formats[attribIndex], format); SetBitField(packedAttrib.format, format);
SetBitField(mVertexInputAttribs.offsets[attribIndex], relativeOffset); SetBitField(packedAttrib.offset, relativeOffset);
transition->set(ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, formats, attribIndex, 8));
transition->set( constexpr size_t kAttribBits = kPackedAttribDescSize * kBitsPerByte;
ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, offsets, attribIndex, 16)); const size_t kBit =
ANGLE_GET_INDEXED_TRANSITION_BIT(mVertexInputAttribs, attribs, attribIndex, kAttribBits);
// Cover the next dirty bit conservatively. Because each attribute is 6 bytes.
transition->set(kBit);
transition->set(kBit + 1);
} }
void GraphicsPipelineDesc::updateTopology(GraphicsPipelineTransitionBits *transition, void GraphicsPipelineDesc::updateTopology(GraphicsPipelineTransitionBits *transition,
......
...@@ -124,28 +124,33 @@ bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs); ...@@ -124,28 +124,33 @@ bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
static_assert(sizeof(AttachmentOpsArray) == 80, "Size check failed"); static_assert(sizeof(AttachmentOpsArray) == 80, "Size check failed");
struct PackedVertexInputBindingDesc final struct PackedAttribDesc final
{ {
// Although techncially stride can be any value in ES 2.0, in practice supporting stride uint8_t format;
// inputRate can also be used to store instancing divisors up to 255.
// TODO(http://anglebug.com/2672): Emulate divisors greater than UBYTE_MAX.
uint8_t inputRate;
// Can only take 11 bits on NV.
uint16_t offset;
// Although technically stride can be any value in ES 2.0, in practice supporting stride
// greater than MAX_USHORT should not be that helpful. Note that stride limits are // greater than MAX_USHORT should not be that helpful. Note that stride limits are
// introduced in ES 3.1. // introduced in ES 3.1.
uint16_t stride; uint16_t stride;
uint16_t inputRate;
}; };
constexpr size_t kVertexInputBindingSize = sizeof(PackedVertexInputBindingDesc); constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
static_assert(kVertexInputBindingSize == 4, "Size check failed"); static_assert(kPackedAttribDescSize == 6, "Size mismatch");
using VertexInputBindings = gl::AttribArray<PackedVertexInputBindingDesc>;
constexpr size_t kVertexInputBindingsSize = sizeof(VertexInputBindings);
struct VertexInputAttributes final struct VertexInputAttributes final
{ {
uint8_t formats[gl::MAX_VERTEX_ATTRIBS]; PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
uint16_t offsets[gl::MAX_VERTEX_ATTRIBS]; // can only take 11 bits on NV
}; };
constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes); constexpr size_t kVertexInputAttributesSize = sizeof(VertexInputAttributes);
static_assert(kVertexInputAttributesSize == 96, "Size mismatch");
struct RasterizationStateBits final struct RasterizationStateBits final
{ {
...@@ -274,9 +279,9 @@ constexpr size_t kPackedInputAssemblyAndColorBlendStateSize = ...@@ -274,9 +279,9 @@ constexpr size_t kPackedInputAssemblyAndColorBlendStateSize =
static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed"); static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
constexpr size_t kGraphicsPipelineDescSumOfSizes = constexpr size_t kGraphicsPipelineDescSumOfSizes =
kVertexInputBindingsSize + kVertexInputAttributesSize + kVertexInputAttributesSize + kPackedInputAssemblyAndColorBlendStateSize +
kPackedInputAssemblyAndColorBlendStateSize + kPackedRasterizationAndMultisampleStateSize + kPackedRasterizationAndMultisampleStateSize + kPackedDepthStencilStateSize +
kPackedDepthStencilStateSize + kRenderPassDescSize; kRenderPassDescSize;
// Number of dirty bits in the dirty bit set. // Number of dirty bits in the dirty bit set.
constexpr size_t kGraphicsPipelineDirtyBitBytes = 4; constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
...@@ -398,7 +403,6 @@ class GraphicsPipelineDesc final ...@@ -398,7 +403,6 @@ class GraphicsPipelineDesc final
const gl::RasterizerState &rasterState); const gl::RasterizerState &rasterState);
private: private:
VertexInputBindings mVertexInputBindings;
VertexInputAttributes mVertexInputAttribs; VertexInputAttributes mVertexInputAttribs;
RenderPassDesc mRenderPassDesc; RenderPassDesc mRenderPassDesc;
PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo; PackedRasterizationAndMultisampleStateInfo mRasterizationAndMultisampleStateInfo;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment