Commit 44063c80 by Jamie Madill Committed by Commit Bot

Vulkan: Store array buffer conversions in BufferVk.

The intent of this CL is to call convertVertexBuffer*PU only when we have new data to convert. If the app unbinds and rebinds a vertex buffer without changing the data we can now retrieve the cached vertex buffer info from the BufferVk class. Previously we would always reconvert the data on a rebind. This was slowing down applications and benchmarks. To achieve this we add a conversion cache to BufferVk. Each cache entry stores a key based on the vertex info. Also we store a ring buffer for each cache entry and a flag to indicate if the entry is dirty. The cache is dirtied on a bufffer data update or a map call. Improves performance in the T-Rex benchmark. Bug: angleproject:3495 Change-Id: Ia999c9187510748ba95bc98362eb332e1990d270 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1638903 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: 's avatarCourtney Goeltzenleuchter <courtneygo@google.com>
parent e431aaa1
......@@ -26,8 +26,44 @@ namespace
// On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
// last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
constexpr size_t kBufferSizeGranularity = 4;
// Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
} // namespace
// ConversionBuffer implementation.
ConversionBuffer::ConversionBuffer(RendererVk *renderer,
VkBufferUsageFlags usageFlags,
size_t initialSize,
size_t alignment)
: dirty(true), lastAllocationOffset(0), data(usageFlags, initialSize, true)
{
data.init(alignment, renderer);
}
ConversionBuffer::~ConversionBuffer() = default;
ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
// BufferVk::VertexConversionBuffer implementation.
BufferVk::VertexConversionBuffer::VertexConversionBuffer(RendererVk *renderer,
angle::FormatID formatIDIn,
GLuint strideIn,
size_t offsetIn)
: ConversionBuffer(renderer,
vk::kVertexBufferUsageFlags,
kConvertedArrayBufferInitialSize,
vk::kVertexBufferAlignment),
formatID(formatIDIn),
stride(strideIn),
offset(offsetIn)
{}
BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
// BufferVk implementation.
BufferVk::BufferVk(const gl::BufferState &state) : BufferImpl(state), mDataWriteAccessFlags(0) {}
BufferVk::~BufferVk() {}
......@@ -42,6 +78,11 @@ void BufferVk::destroy(const gl::Context *context)
void BufferVk::release(ContextVk *contextVk)
{
mBuffer.release(contextVk);
for (ConversionBuffer &buffer : mVertexConversionBuffers)
{
buffer.data.release(contextVk);
}
}
angle::Result BufferVk::setData(const gl::Context *context,
......@@ -156,6 +197,8 @@ angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
mBuffer.getDeviceMemory().unmap(contextVk->getDevice());
mDataWriteAccessFlags = VK_ACCESS_HOST_WRITE_BIT;
markConversionBuffersDirty();
return angle::Result::Continue;
}
......@@ -222,7 +265,7 @@ angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
size_t size,
size_t offset)
{
VkDevice device = contextVk->getDevice();
VkDevice device = contextVk->getDevice();
// Use map when available.
if (mBuffer.isResourceInUse(contextVk))
......@@ -260,6 +303,9 @@ angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
mDataWriteAccessFlags = VK_ACCESS_HOST_WRITE_BIT;
}
// Update conversions
markConversionBuffersDirty();
return angle::Result::Continue;
}
......@@ -278,4 +324,29 @@ angle::Result BufferVk::copyToBuffer(ContextVk *contextVk,
return angle::Result::Continue;
}
ConversionBuffer *BufferVk::getVertexConversionBuffer(RendererVk *renderer,
angle::FormatID formatID,
GLuint stride,
size_t offset)
{
for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
{
if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
{
return &buffer;
}
}
mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset);
return &mVertexConversionBuffers.back();
}
void BufferVk::markConversionBuffersDirty()
{
for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
{
buffer.dirty = true;
}
}
} // namespace rx
......@@ -19,6 +19,27 @@ namespace rx
{
class RendererVk;
// Conversion buffers hold translated index and vertex data.
struct ConversionBuffer
{
ConversionBuffer(RendererVk *renderer,
VkBufferUsageFlags usageFlags,
size_t initialSize,
size_t alignment);
~ConversionBuffer();
ConversionBuffer(ConversionBuffer &&other);
// One state value determines if we need to re-stream vertex data.
bool dirty;
// One additional state value keeps the last allocation offset.
VkDeviceSize lastAllocationOffset;
// The conversion is stored in a dynamic buffer.
vk::DynamicBuffer data;
};
class BufferVk : public BufferImpl
{
public:
......@@ -83,15 +104,40 @@ class BufferVk : public BufferImpl
uint32_t copyCount,
const VkBufferCopy *copies);
ConversionBuffer *getVertexConversionBuffer(RendererVk *renderer,
angle::FormatID formatID,
GLuint stride,
size_t offset);
private:
angle::Result setDataImpl(ContextVk *contextVk,
const uint8_t *data,
size_t size,
size_t offset);
void release(ContextVk *context);
void markConversionBuffersDirty();
struct VertexConversionBuffer : public ConversionBuffer
{
VertexConversionBuffer(RendererVk *renderer,
angle::FormatID formatIDIn,
GLuint strideIn,
size_t offsetIn);
~VertexConversionBuffer();
VertexConversionBuffer(VertexConversionBuffer &&other);
// The conversion is identified by the triple of {format, stride, offset}.
angle::FormatID formatID;
GLuint stride;
size_t offset;
};
vk::BufferHelper mBuffer;
VkAccessFlags mDataWriteAccessFlags;
// A cache of converted vertex data.
std::vector<VertexConversionBuffer> mVertexConversionBuffers;
};
} // namespace rx
......
......@@ -24,13 +24,8 @@ namespace rx
{
namespace
{
constexpr size_t kDynamicVertexDataSize = 1024 * 1024;
constexpr size_t kDynamicIndexDataSize = 1024 * 8;
constexpr size_t kMaxVertexFormatAlignment = 4;
constexpr VkBufferUsageFlags kVertexBufferUsageFlags =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr VkBufferUsageFlags kIndexBufferUsageFlags =
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr size_t kDynamicVertexDataSize = 1024 * 1024;
constexpr size_t kDynamicIndexDataSize = 1024 * 8;
ANGLE_INLINE bool BindingIsAligned(const gl::VertexBinding &binding,
const angle::Format &angleFormat,
......@@ -90,40 +85,16 @@ size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uin
}
} // anonymous namespace
#define INIT \
{ \
kVertexBufferUsageFlags, 1024 * 8, true \
}
VertexArrayVk::VertexArrayVk(ContextVk *contextVk, const gl::VertexArrayState &state)
: VertexArrayImpl(state),
mCurrentArrayBufferHandles{},
mCurrentArrayBufferOffsets{},
mCurrentArrayBuffers{},
mCurrentArrayBufferConversion{{
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
INIT,
}},
mCurrentArrayBufferConversionCanRelease{},
mCurrentElementArrayBufferOffset(0),
mCurrentElementArrayBuffer(nullptr),
mDynamicVertexData(kVertexBufferUsageFlags, kDynamicVertexDataSize, true),
mDynamicIndexData(kIndexBufferUsageFlags, kDynamicIndexDataSize, true),
mTranslatedByteIndexData(kIndexBufferUsageFlags, kDynamicIndexDataSize, true),
mDynamicVertexData(vk::kVertexBufferUsageFlags, kDynamicVertexDataSize, true),
mDynamicIndexData(vk::kIndexBufferUsageFlags, kDynamicIndexDataSize, true),
mTranslatedByteIndexData(vk::kIndexBufferUsageFlags, kDynamicIndexDataSize, true),
mLineLoopHelper(contextVk->getRenderer()),
mDirtyLineLoopTranslation(true)
{
......@@ -139,16 +110,12 @@ VertexArrayVk::VertexArrayVk(ContextVk *contextVk, const gl::VertexArrayState &s
mCurrentArrayBufferOffsets.fill(0);
mCurrentArrayBuffers.fill(&mTheNullBuffer);
for (vk::DynamicBuffer &buffer : mCurrentArrayBufferConversion)
{
buffer.init(kMaxVertexFormatAlignment, renderer);
}
mDynamicVertexData.init(kMaxVertexFormatAlignment, renderer);
mDynamicVertexData.init(vk::kVertexBufferAlignment, renderer);
// We use an alignment of four for index data. This ensures that compute shaders can read index
// elements from "uint" aligned addresses.
mDynamicIndexData.init(4, renderer);
mTranslatedByteIndexData.init(4, renderer);
mDynamicIndexData.init(vk::kIndexBufferAlignment, renderer);
mTranslatedByteIndexData.init(vk::kIndexBufferAlignment, renderer);
}
VertexArrayVk::~VertexArrayVk() {}
......@@ -159,10 +126,6 @@ void VertexArrayVk::destroy(const gl::Context *context)
mTheNullBuffer.release(contextVk);
for (vk::DynamicBuffer &buffer : mCurrentArrayBufferConversion)
{
buffer.release(contextVk);
}
mDynamicVertexData.release(contextVk);
mDynamicIndexData.release(contextVk);
mTranslatedByteIndexData.release(contextVk);
......@@ -239,7 +202,8 @@ angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk,
BufferVk *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const vk::Format &vertexFormat)
const vk::Format &vertexFormat,
ConversionBuffer *conversion)
{
const angle::Format &srcFormat = vertexFormat.angleFormat();
const angle::Format &destFormat = vertexFormat.bufferFormat();
......@@ -255,15 +219,15 @@ angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk,
return angle::Result::Continue;
}
ASSERT(GetVertexInputAlignment(vertexFormat) <= kMaxVertexFormatAlignment);
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
// Allocate buffer for results
mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(contextVk);
ANGLE_TRY(mCurrentArrayBufferConversion[attribIndex].allocate(
contextVk, numVertices * destFormatSize, nullptr, nullptr,
&mCurrentArrayBufferOffsets[attribIndex], nullptr));
mCurrentArrayBuffers[attribIndex] =
mCurrentArrayBufferConversion[attribIndex].getCurrentBuffer();
conversion->data.releaseRetainedBuffers(contextVk);
ANGLE_TRY(conversion->data.allocate(contextVk, numVertices * destFormatSize, nullptr, nullptr,
&conversion->lastAllocationOffset, nullptr));
ASSERT(conversion->dirty);
conversion->dirty = false;
UtilsVk::ConvertVertexParameters params;
params.vertexCount = numVertices;
......@@ -271,14 +235,10 @@ angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk,
params.destFormat = &destFormat;
params.srcStride = binding.getStride();
params.srcOffset = binding.getOffset();
params.destOffset = static_cast<size_t>(mCurrentArrayBufferOffsets[attribIndex]);
params.destOffset = static_cast<size_t>(conversion->lastAllocationOffset);
ANGLE_TRY(contextVk->getUtils().convertVertexBuffer(
contextVk, mCurrentArrayBuffers[attribIndex], &srcBuffer->getBuffer(), params));
mCurrentArrayBufferHandles[attribIndex] =
mCurrentArrayBuffers[attribIndex]->getBuffer().getHandle();
mCurrentArrayBufferConversionCanRelease[attribIndex] = true;
contextVk, conversion->data.getCurrentBuffer(), &srcBuffer->getBuffer(), params));
return angle::Result::Continue;
}
......@@ -287,7 +247,8 @@ angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
BufferVk *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const vk::Format &vertexFormat)
const vk::Format &vertexFormat,
ConversionBuffer *conversion)
{
TRACE_EVENT0("gpu.angle", "VertexArrayVk::convertVertexBufferCpu");
// Needed before reading buffer or we could get stale data.
......@@ -296,7 +257,7 @@ angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
unsigned srcFormatSize = vertexFormat.angleFormat().pixelBytes;
unsigned dstFormatSize = vertexFormat.bufferFormat().pixelBytes;
mCurrentArrayBufferConversion[attribIndex].releaseRetainedBuffers(contextVk);
conversion->data.releaseRetainedBuffers(contextVk);
size_t numVertices = GetVertexCount(srcBuffer, binding, srcFormatSize);
if (numVertices == 0)
......@@ -308,29 +269,19 @@ angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
ANGLE_TRY(srcBuffer->mapImpl(contextVk, &src));
const uint8_t *srcBytes = reinterpret_cast<const uint8_t *>(src);
srcBytes += binding.getOffset();
ASSERT(GetVertexInputAlignment(vertexFormat) <= kMaxVertexFormatAlignment);
ANGLE_TRY(StreamVertexData(contextVk, &mCurrentArrayBufferConversion[attribIndex], srcBytes,
numVertices * dstFormatSize, 0, numVertices, binding.getStride(),
vertexFormat.vertexLoadFunction, &mCurrentArrayBuffers[attribIndex],
&mCurrentArrayBufferOffsets[attribIndex]));
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
ANGLE_TRY(StreamVertexData(contextVk, &conversion->data, srcBytes, numVertices * dstFormatSize,
0, numVertices, binding.getStride(), vertexFormat.vertexLoadFunction,
&mCurrentArrayBuffers[attribIndex],
&conversion->lastAllocationOffset));
ANGLE_TRY(srcBuffer->unmapImpl(contextVk));
mCurrentArrayBufferHandles[attribIndex] =
mCurrentArrayBuffers[attribIndex]->getBuffer().getHandle();
mCurrentArrayBufferConversionCanRelease[attribIndex] = true;
ASSERT(conversion->dirty);
conversion->dirty = false;
return angle::Result::Continue;
}
ANGLE_INLINE void VertexArrayVk::ensureConversionReleased(ContextVk *contextVk, size_t attribIndex)
{
if (mCurrentArrayBufferConversionCanRelease[attribIndex])
{
mCurrentArrayBufferConversion[attribIndex].release(contextVk);
mCurrentArrayBufferConversionCanRelease[attribIndex] = false;
}
}
angle::Result VertexArrayVk::syncState(const gl::Context *context,
const gl::VertexArray::DirtyBits &dirtyBits,
gl::VertexArray::DirtyAttribBitsArray *attribBits,
......@@ -470,17 +421,28 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
{
stride = vertexFormat.bufferFormat().pixelBytes;
if (bindingIsAligned)
// This will require supporting relativeOffset in ES 3.1.
ConversionBuffer *conversion = bufferVk->getVertexConversionBuffer(
renderer, angleFormat.id, binding.getStride(), binding.getOffset());
if (conversion->dirty)
{
ANGLE_TRY(convertVertexBufferGPU(contextVk, bufferVk, binding, attribIndex,
vertexFormat));
anyVertexBufferConvertedOnGpu = true;
}
else
{
ANGLE_TRY(convertVertexBufferCPU(contextVk, bufferVk, binding, attribIndex,
vertexFormat));
if (bindingIsAligned)
{
ANGLE_TRY(convertVertexBufferGPU(contextVk, bufferVk, binding, attribIndex,
vertexFormat, conversion));
anyVertexBufferConvertedOnGpu = true;
}
else
{
ANGLE_TRY(convertVertexBufferCPU(contextVk, bufferVk, binding, attribIndex,
vertexFormat, conversion));
}
}
mCurrentArrayBuffers[attribIndex] = conversion->data.getCurrentBuffer();
mCurrentArrayBufferHandles[attribIndex] =
mCurrentArrayBuffers[attribIndex]->getBuffer().getHandle();
mCurrentArrayBufferOffsets[attribIndex] = conversion->lastAllocationOffset;
}
else
{
......@@ -500,8 +462,6 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
mCurrentArrayBufferOffsets[attribIndex] = binding.getOffset();
stride = binding.getStride();
}
ensureConversionReleased(contextVk, attribIndex);
}
}
else
......@@ -510,7 +470,6 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
mCurrentArrayBufferHandles[attribIndex] = mTheNullBuffer.getBuffer().getHandle();
mCurrentArrayBufferOffsets[attribIndex] = 0;
stride = vertexFormat.bufferFormat().pixelBytes;
ensureConversionReleased(contextVk, attribIndex);
}
contextVk->onVertexAttributeChange(attribIndex, stride, binding.getDivisor(),
......@@ -526,7 +485,6 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
mCurrentArrayBufferOffsets[attribIndex] = 0;
setDefaultPackedInput(contextVk, attribIndex);
ensureConversionReleased(contextVk, attribIndex);
}
if (anyVertexBufferConvertedOnGpu && renderer->getFeatures().flushAfterVertexConversion.enabled)
......@@ -571,7 +529,7 @@ angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
const vk::Format &vertexFormat = renderer->getFormat(GetVertexFormatID(attrib));
GLuint stride = vertexFormat.bufferFormat().pixelBytes;
ASSERT(GetVertexInputAlignment(vertexFormat) <= kMaxVertexFormatAlignment);
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
const uint8_t *src = static_cast<const uint8_t *>(attrib.pointer);
if (binding.getDivisor() > 0)
......
......@@ -17,6 +17,7 @@
namespace rx
{
class BufferVk;
struct ConversionBuffer;
class VertexArrayVk : public VertexArrayImpl
{
......@@ -97,13 +98,14 @@ class VertexArrayVk : public VertexArrayImpl
BufferVk *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const vk::Format &vertexFormat);
const vk::Format &vertexFormat,
ConversionBuffer *conversion);
angle::Result convertVertexBufferCPU(ContextVk *contextVk,
BufferVk *srcBuffer,
const gl::VertexBinding &binding,
size_t attribIndex,
const vk::Format &vertexFormat);
void ensureConversionReleased(ContextVk *contextVk, size_t attribIndex);
const vk::Format &vertexFormat,
ConversionBuffer *conversion);
angle::Result syncDirtyAttrib(ContextVk *contextVk,
const gl::VertexAttribute &attrib,
......@@ -113,8 +115,6 @@ class VertexArrayVk : public VertexArrayImpl
gl::AttribArray<VkBuffer> mCurrentArrayBufferHandles;
gl::AttribArray<VkDeviceSize> mCurrentArrayBufferOffsets;
gl::AttribArray<vk::BufferHelper *> mCurrentArrayBuffers;
gl::AttribArray<vk::DynamicBuffer> mCurrentArrayBufferConversion;
gl::AttribArray<bool> mCurrentArrayBufferConversionCanRelease;
VkDeviceSize mCurrentElementArrayBufferOffset;
vk::BufferHelper *mCurrentElementArrayBuffer;
......
......@@ -21,6 +21,13 @@ namespace rx
{
namespace vk
{
constexpr VkBufferUsageFlags kVertexBufferUsageFlags =
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr VkBufferUsageFlags kIndexBufferUsageFlags =
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
constexpr size_t kVertexBufferAlignment = 4;
constexpr size_t kIndexBufferAlignment = 4;
// A dynamic buffer is conceptually an infinitely long buffer. Each time you write to the buffer,
// you will always write to a previously unused portion. After a series of writes, you must flush
// the buffer data to the device. Buffer lifetime currently assumes that each new allocation will
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment