Commit e9111885 by Shahbaz Youssefi Committed by Commit Bot

Vulkan: Optimize texture upload barriers

When flushing staged uploads to an image, a 64-wide bitfield is used to track subresources that are uploaded since the last barrier. If a collision is detected, a barrier is inserted and the bitfield is reset. If the image has more than 64 subresources, some subresources would map to the same bit and cause a few unnecessary barriers. Texture upload benchmarks show 5% to 10% improvement both in CPU and GPU time. Bug: angleproject:3347 Change-Id: I21dd23c78879bb01d58bc869ffc8bee06c79e6c1 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1700147Reviewed-by: 's avatarTobin Ehlis <tobine@google.com> Reviewed-by: 's avatarJamie Madill <jmadill@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
parent 0efe516e
...@@ -1257,8 +1257,16 @@ inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned in ...@@ -1257,8 +1257,16 @@ inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned in
#if defined(_MSC_VER) #if defined(_MSC_VER)
# define ANGLE_ROTL(x, y) _rotl(x, y) # define ANGLE_ROTL(x, y) _rotl(x, y)
# define ANGLE_ROTL64(x, y) _rotl64(x, y)
# define ANGLE_ROTR16(x, y) _rotr16(x, y) # define ANGLE_ROTR16(x, y) _rotr16(x, y)
#elif defined(__clang__) && __has_builtin(__builtin_rotateleft32) && \
__has_builtin(__builtin_rotateleft64) && __has_builtin(__builtin_rotateright16)
# define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
# define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
# define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
#else #else
inline uint32_t RotL(uint32_t x, int8_t r) inline uint32_t RotL(uint32_t x, int8_t r)
...@@ -1266,12 +1274,18 @@ inline uint32_t RotL(uint32_t x, int8_t r) ...@@ -1266,12 +1274,18 @@ inline uint32_t RotL(uint32_t x, int8_t r)
return (x << r) | (x >> (32 - r)); return (x << r) | (x >> (32 - r));
} }
inline uint64_t RotL64(uint64_t x, int8_t r)
{
return (x << r) | (x >> (64 - r));
}
inline uint16_t RotR16(uint16_t x, int8_t r) inline uint16_t RotR16(uint16_t x, int8_t r)
{ {
return (x >> r) | (x << (16 - r)); return (x >> r) | (x << (16 - r));
} }
# define ANGLE_ROTL(x, y) ::rx::RotL(x, y) # define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
# define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
# define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y) # define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
#endif // namespace rx #endif // namespace rx
......
...@@ -1847,7 +1847,7 @@ void ImageHelper::clearColor(const VkClearColorValue &color, ...@@ -1847,7 +1847,7 @@ void ImageHelper::clearColor(const VkClearColorValue &color,
{ {
ASSERT(valid()); ASSERT(valid());
changeLayout(VK_IMAGE_ASPECT_COLOR_BIT, ImageLayout::TransferDst, commandBuffer); ASSERT(mCurrentLayout == ImageLayout::TransferDst);
VkImageSubresourceRange range = {}; VkImageSubresourceRange range = {};
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
...@@ -1870,7 +1870,7 @@ void ImageHelper::clearDepthStencil(VkImageAspectFlags imageAspectFlags, ...@@ -1870,7 +1870,7 @@ void ImageHelper::clearDepthStencil(VkImageAspectFlags imageAspectFlags,
{ {
ASSERT(valid()); ASSERT(valid());
changeLayout(imageAspectFlags, ImageLayout::TransferDst, commandBuffer); ASSERT(mCurrentLayout == ImageLayout::TransferDst);
VkImageSubresourceRange clearRange = { VkImageSubresourceRange clearRange = {
/*aspectMask*/ clearAspectFlags, /*aspectMask*/ clearAspectFlags,
...@@ -2444,6 +2444,16 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk, ...@@ -2444,6 +2444,16 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
std::vector<SubresourceUpdate> updatesToKeep; std::vector<SubresourceUpdate> updatesToKeep;
const VkImageAspectFlags aspectFlags = GetFormatAspectFlags(mFormat->imageFormat()); const VkImageAspectFlags aspectFlags = GetFormatAspectFlags(mFormat->imageFormat());
// Upload levels and layers that don't conflict in parallel. The (level, layer) pair is hashed
// to `(level * mLayerCount + layer) % 64` and used to track whether that subresource is
// currently in transfer. If so, a barrier is inserted. If mLayerCount * mLevelCount > 64,
// there will be a few unnecessary barriers.
constexpr uint32_t kMaxParallelSubresourceUpload = 64;
uint64_t subresourceUploadsInProgress = 0;
// Start in TransferDst.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
for (SubresourceUpdate &update : mSubresourceUpdates) for (SubresourceUpdate &update : mSubresourceUpdates)
{ {
ASSERT(update.updateSource == SubresourceUpdate::UpdateSource::Clear || ASSERT(update.updateSource == SubresourceUpdate::UpdateSource::Clear ||
...@@ -2471,6 +2481,7 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk, ...@@ -2471,6 +2481,7 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
updateMipLevel = dstSubresource.mipLevel; updateMipLevel = dstSubresource.mipLevel;
updateBaseLayer = dstSubresource.baseArrayLayer; updateBaseLayer = dstSubresource.baseArrayLayer;
updateLayerCount = dstSubresource.layerCount; updateLayerCount = dstSubresource.layerCount;
ASSERT(updateLayerCount != static_cast<uint32_t>(gl::ImageIndex::kEntireLevel));
} }
// If the update level is not within the requested range, skip the update. // If the update level is not within the requested range, skip the update.
...@@ -2486,10 +2497,28 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk, ...@@ -2486,10 +2497,28 @@ angle::Result ImageHelper::flushStagedUpdates(ContextVk *contextVk,
continue; continue;
} }
// Conservatively add a barrier between every update. This is to avoid races when updating if (updateLayerCount >= kMaxParallelSubresourceUpload)
// the same subresource. A possible optimization could be to only issue this barrier when {
// an overlap in updates is observed. // If there are more subresources than bits we can track, always insert a barrier.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer); changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
subresourceUploadsInProgress = std::numeric_limits<uint64_t>::max();
}
else
{
const uint64_t subresourceHashRange = angle::Bit<uint64_t>(updateLayerCount) - 1;
const uint32_t subresourceHashOffset =
(updateMipLevel * mLayerCount + updateBaseLayer) % kMaxParallelSubresourceUpload;
const uint64_t subresourceHash =
ANGLE_ROTL64(subresourceHashRange, subresourceHashOffset);
if ((subresourceUploadsInProgress & subresourceHash) != 0)
{
// If there's overlap in subresource upload, issue a barrier.
changeLayout(aspectFlags, vk::ImageLayout::TransferDst, commandBuffer);
subresourceUploadsInProgress = 0;
}
subresourceUploadsInProgress |= subresourceHash;
}
if (update.updateSource == SubresourceUpdate::UpdateSource::Clear) if (update.updateSource == SubresourceUpdate::UpdateSource::Clear)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment