Vulkan: Optimize memory allocation offset rounding

Use bit operation to compute memory alignment round up. Tested with T-Rex, hotspot in roundUp resolved. Bug: angleproject:3744 Change-Id: I55db941f588401a1d2d3d93f4d6b810e6b1aa95f Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1725118 Commit-Queue: Jiacheng Lu <lujc@google.com> Reviewed-by: Tim Van Patten <timvp@google.com>

Vulkan: Optimize memory allocation offset rounding
3e493c48 · Jiacheng Lu · Commit Bot · 34ebecc1 · 3e493c48 · 3e493c48
Commit 3e493c48 authored Jul 29, 2019 by Jiacheng Lu Committed by Commit Bot Jul 31, 2019
5 changed files
--- a/src/common/PoolAlloc.cpp
+++ b/src/common/PoolAlloc.cpp
@@ -65,7 +65,7 @@ PoolAllocator::PoolAllocator(int growthIncrement, int allocationAlignment)
        mHeaderSkip = minAlign;
        if (mHeaderSkip < sizeof(Header))
        {
-            mHeaderSkip = rx::roundUp(sizeof(Header), mAlignment);
+            mHeaderSkip = rx::roundUpPow2(sizeof(Header), mAlignment);
        }
    }
    //
@@ -331,4 +331,4 @@ void Allocation::checkAllocList() const
        alloc->check();
 }
 }  // namespace angle
\ No newline at end of file
--- a/src/common/mathutil.h
+++ b/src/common/mathutil.h
@@ -34,7 +34,7 @@ const unsigned int Float32One   = 0x3F800000;
 const unsigned short Float16One = 0x3C00;
 template <typename T>
-inline bool isPow2(T x)
+inline constexpr bool isPow2(T x)
 {
    static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
    return (x & (x - 1)) == 0 && (x != 0);
@@ -1249,6 +1249,13 @@ T roundUp(const T value, const T alignment)
 }
 template <typename T>
+constexpr T roundUpPow2(const T value, const T alignment)
+{
+    ASSERT(gl::isPow2(alignment));
+    return (value + alignment - 1) & ~(alignment - 1);
+}
+template <typename T>
 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
 {
    angle::CheckedNumeric<T> checkedValue(value);

--- a/src/libANGLE/renderer/d3d/VertexBuffer.cpp
+++ b/src/libANGLE/renderer/d3d/VertexBuffer.cpp
@@ -103,7 +103,7 @@ angle::Result VertexBufferInterface::getSpaceRequired(const gl::Context *context
                                               &spaceRequired));
    // Align to 16-byte boundary
-    unsigned int alignedSpaceRequired = roundUp(spaceRequired, 16u);
+    unsigned int alignedSpaceRequired = roundUpPow2(spaceRequired, 16u);
    ANGLE_CHECK_GL_ALLOC(GetImplAs<ContextD3D>(context), alignedSpaceRequired >= spaceRequired);
    *spaceInBytesOut = alignedSpaceRequired;

--- a/src/libANGLE/renderer/vulkan/BufferVk.cpp
+++ b/src/libANGLE/renderer/vulkan/BufferVk.cpp
@@ -26,6 +26,7 @@ namespace
 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
 // last n bytes.  By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
 constexpr size_t kBufferSizeGranularity = 4;
+static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
@@ -109,7 +110,7 @@ angle::Result BufferVk::setData(const gl::Context *context,
        VkBufferCreateInfo createInfo    = {};
        createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
        createInfo.flags                 = 0;
-        createInfo.size                  = roundUp(size, kBufferSizeGranularity);
+        createInfo.size                  = roundUpPow2(size, kBufferSizeGranularity);
        createInfo.usage                 = usageFlags;
        createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
        createInfo.queueFamilyIndexCount = 0;

--- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
@@ -343,7 +343,7 @@ angle::Result DynamicBuffer::allocate(ContextVk *contextVk,
                                      VkDeviceSize *offsetOut,
                                      bool *newBufferAllocatedOut)
 {
-    size_t sizeToAllocate = roundUp(sizeInBytes, mAlignment);
+    size_t sizeToAllocate = roundUpPow2(sizeInBytes, mAlignment);
    angle::base::CheckedNumeric<size_t> checkedNextWriteOffset = mNextAllocationOffset;
    checkedNextWriteOffset += sizeToAllocate;
@@ -563,11 +563,13 @@ void DynamicBuffer::updateAlignment(RendererVk *renderer, size_t alignment)
    // be used instead.
    ASSERT(alignment % atomSize == 0 || atomSize % alignment == 0);
    alignment = std::max(alignment, atomSize);
+    ASSERT(gl::isPow2(alignment));
    // If alignment has changed, make sure the next allocation is done at an aligned offset.
    if (alignment != mAlignment)
    {
-        mNextAllocationOffset = roundUp(mNextAllocationOffset, static_cast<uint32_t>(alignment));
+        mNextAllocationOffset =
+            roundUpPow2(mNextAllocationOffset, static_cast<uint32_t>(alignment));
    }
    mAlignment = alignment;